[update] FileUtil增加读取大文件自定义方法和单纯读取方法

This commit is contained in:
its 2024-04-15 18:16:00 +08:00
parent e7b8f020ed
commit 9731faf9f8
5 changed files with 163 additions and 37 deletions

View File

@ -1,5 +1,13 @@
yexuejc-base 更新记录 yexuejc-base 更新记录
------------------ ------------------
#### version 1.5.3-jre8
**time ** <br/>
**branch** jre8 <br/>
**update** <br/>
1. [FileUtil.java](src/main/java/com/yexuejc/base/util/FileUtil.java)增加读取大文件自定义方法和单纯读取方法
2.
---
#### version 1.5.2-jre8 #### version 1.5.2-jre8
**time2024-4-7 14:34:33** <br/> **time2024-4-7 14:34:33** <br/>
**branch** jre8 <br/> **branch** jre8 <br/>

View File

@ -0,0 +1,38 @@
package com.yexuejc.base.converter;
import java.io.IOException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.deser.std.StdScalarDeserializer;
/**
* 反序列化中内容为空时返回Integer为空
* <p>使用方式@JsonDeserialize(using = IntegerNullValueDeserializer.class)</p>
* @author: yexuejc
* @date: 2024/4/15 18:08
*/
public class IntegerNullValueDeserializer extends StdScalarDeserializer<Integer> {
public IntegerNullValueDeserializer() {
super(Integer.class);
}
@Override
public Integer deserialize(JsonParser p, DeserializationContext ctxt) throws IOException {
String value = p.getValueAsString();
if (isInteger(value)) {
return super._parseInteger(p, ctxt, Integer.class);
} else {
return null;
}
}
private static boolean isInteger(String s) {
try {
Integer.parseInt(s);
return true;
} catch (NumberFormatException e) {
return false;
}
}
}

View File

@ -18,6 +18,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Base64; import java.util.Base64;
import java.util.List; import java.util.List;
import java.util.function.Function;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.zip.CRC32; import java.util.zip.CRC32;
@ -38,11 +39,14 @@ import io.jsonwebtoken.lang.Assert;
* @time 2017年11月3日 下午3:12:49 * @time 2017年11月3日 下午3:12:49
*/ */
public class FileUtil { public class FileUtil {
static Logger logger = Logger.getLogger(FileUtil.class.getName()); private static final Logger logger = Logger.getLogger(FileUtil.class.getName());
private FileUtil() { private FileUtil() {
} }
private static final String NEW_LINE = "\n";
private static final String CONSTANT_DOT = ".";
private static final String TYPE_TAR_GZ = ".tar.gz"; private static final String TYPE_TAR_GZ = ".tar.gz";
private static final String TYPE_CSV = ".csv"; private static final String TYPE_CSV = ".csv";
private static final String TAR_GZ = "tar.gz"; private static final String TAR_GZ = "tar.gz";
@ -61,7 +65,7 @@ public class FileUtil {
if (fileName.lastIndexOf(TYPE_TAR_GZ) > 0) { if (fileName.lastIndexOf(TYPE_TAR_GZ) > 0) {
return TAR_GZ; return TAR_GZ;
} }
return fileName.substring(fileName.lastIndexOf(".") + 1); return fileName.substring(fileName.lastIndexOf(CONSTANT_DOT) + 1);
} catch (Exception e) { } catch (Exception e) {
logger.severe("file doesn't exist or is not a file"); logger.severe("file doesn't exist or is not a file");
} }
@ -332,22 +336,19 @@ public class FileUtil {
} }
/** /**
* 分段读取大文件 * 分段读取大文件不限格式
* *
* @param csvFilePath 文件路径 * @param filePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数 * @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @param <T> 读取结果类型bean * @param <T> 读取结果类型bean
* @return * @return 文件分页读取内容自定义处理后及读取信息
*/ */
public static <T> ReadFileBean<T> readBigFile(String csvFilePath, ReadFileBean<T> readFileBean, Class<T> readCls) throws IOException { public static <T> ReadFileBean<T> readBigFile(String filePath, ReadFileBean<T> readFileBean, Function<List<String>, List<T>> readAfter) throws IOException {
if (!isFileExist(csvFilePath)) { if (!isFileExist(filePath)) {
throw new FileNotFoundException(String.format("解析用的csv [%s] 文件不存在。", csvFilePath)); throw new FileNotFoundException(String.format("[%s]文件不存在。", filePath));
}
if (!csvFilePath.endsWith(TYPE_CSV)) {
throw new IOException(String.format("解析用的csv [%s] 文件不是CSV文件格式。", csvFilePath));
} }
List<String> datas = new ArrayList<>(); List<String> datas = new ArrayList<>();
try (RandomAccessFile randomAccessFile = new RandomAccessFile(new File(csvFilePath), "r")) { try (RandomAccessFile randomAccessFile = new RandomAccessFile(new File(filePath), "r")) {
if (readFileBean.getPointer() < 0) { if (readFileBean.getPointer() < 0) {
readFileBean.setPointer(0); readFileBean.setPointer(0);
} }
@ -365,7 +366,35 @@ public class FileUtil {
//无数据 //无数据
return readFileBean.setDatas(new ArrayList<>()); return readFileBean.setDatas(new ArrayList<>());
} }
List<T> dataList = readAfter.apply(datas);
readFileBean.setDatas(dataList);
return readFileBean;
}
/**
* 分段读取大文件(不解析)
*
* @param csvFilePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @return 文件分页读取内容每行为一个String对象及读取信息
*/
public static ReadFileBean<String> readBigFile(String csvFilePath, ReadFileBean<String> readFileBean) throws IOException {
return readBigFile(csvFilePath, readFileBean, (datas) -> datas);
}
/**
* 分段读取大文件(CSV格式)
*
* @param csvFilePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @param <T> 读取结果类型bean
* @return 文件分页读取内容转bean后及读取信息
*/
public static <T> ReadFileBean<T> readBigFile(String csvFilePath, ReadFileBean<T> readFileBean, Class<T> readCls) throws IOException {
if (!csvFilePath.endsWith(TYPE_CSV)) {
throw new IOException(String.format("[%s]文件不是CSV文件格式。", csvFilePath));
}
return readBigFile(csvFilePath, readFileBean, (datas) -> {
//csv文件处理 //csv文件处理
com.yexuejc.base.pojo.CsvToBean csvToBean = getCsvToBean(readCls); com.yexuejc.base.pojo.CsvToBean csvToBean = getCsvToBean(readCls);
readFileBean.setHeader(csvToBean.getHeader()); readFileBean.setHeader(csvToBean.getHeader());
@ -382,10 +411,12 @@ public class FileUtil {
//文件不存在header使用设置的 //文件不存在header使用设置的
datas.add(0, csvToBean.getHeader()); datas.add(0, csvToBean.getHeader());
} }
try {
List<T> dataList = readCsv(String.join("\n", datas), readCls, csvToBean.getDelimiter()); return readCsv(String.join(NEW_LINE, datas), readCls, csvToBean.getDelimiter());
readFileBean.setDatas(dataList); } catch (IOException e) {
return readFileBean; throw new RuntimeException(e);
}
});
} }
/** /**

View File

@ -4,6 +4,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import com.yexuejc.base.pojo.ReadFileBean; import com.yexuejc.base.pojo.ReadFileBean;
import com.yexuejc.base.util.bean.AppnodeCertCsvBean; import com.yexuejc.base.util.bean.AppnodeCertCsvBean;
@ -54,19 +55,63 @@ public class FileUtilTest {
} }
private static void readCsvFile() throws IOException { private static void readCsvFile() throws IOException {
String path = "F:\\coding\\yexuejc-base2\\src\\test\\java\\com\\yexuejc\\base\\util\\test.csv"; String path = "F:\\coding\\yexuejc-base\\src\\test\\java\\com\\yexuejc\\base\\util\\test.csv";
List<AppnodeCertCsvBean> list = FileUtil.readCsv(path, AppnodeCertCsvBean.class, true, "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId", ','); // List<AppnodeCertCsvBean> list = FileUtil.readCsv(path, AppnodeCertCsvBean.class, true, "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId", ',');
System.out.println("***********************************************"); // System.out.println("***********************************************");
System.out.println(JsonUtil.formatPrinter(list)); // System.out.println(JsonUtil.formatPrinter(list));
System.out.println("条数:" + list.size()); // System.out.println("条数:" + list.size());
//直接把每行读取成字符串
ReadFileBean<String> readFileBean2 = new ReadFileBean<>(2);
ReadFileBean<String> bean2 = FileUtil.readBigFile(path, readFileBean2);
System.out.println("直接把每行读取成字符串============================================");
System.out.println(JsonUtil.formatPrinter(bean2));
System.out.println("直接把每行读取成字符串============================================");
//自定义每行数据的处理
ReadFileBean<AppnodeCertCsvBean> readFileBean1 = new ReadFileBean<>(2);
ReadFileBean<AppnodeCertCsvBean> bean1 = FileUtil.readBigFile(path, readFileBean1, datas -> {
if (readFileBean1.getStartRowNum() == 1) {
datas.remove(0);//跳过第一行
}
return datas.stream().map(str -> {
//自定义处理每一条数据
String[] split = str.split(",");
AppnodeCertCsvBean app = new AppnodeCertCsvBean();
app.setEnable(getValue(split, 0));
app.setDomain(getValue(split, 1));
app.setProtocol(getValue(split, 2));
app.setDeployHost(getValue(split, 3));
app.setDeployPath(getValue(split, 4));
app.setUname(getValue(split, 5));
app.setPwd(getValue(split, 6));
if (StrUtil.isNotEmpty(getValue(split, 7))) {
app.setAppnodeId(Integer.valueOf(getValue(split, 7)));
}
return app;
}).collect(Collectors.toList());
});
System.out.println("自定义每行数据的处理============================================");
System.out.println(JsonUtil.formatPrinter(bean1));
System.out.println("自定义每行数据的处理============================================");
//直接使用提供的csv文件读取
ReadFileBean<AppnodeCertCsvBean> readFileBean = new ReadFileBean<>(2); ReadFileBean<AppnodeCertCsvBean> readFileBean = new ReadFileBean<>(2);
do { do {
ReadFileBean<AppnodeCertCsvBean> bean = FileUtil.readBigFile(path, readFileBean, AppnodeCertCsvBean.class); ReadFileBean<AppnodeCertCsvBean> bean = FileUtil.readBigFile(path, readFileBean, AppnodeCertCsvBean.class);
System.out.println("============================================"); System.out.println("直接使用提供的csv文件读取============================================");
System.out.println(JsonUtil.formatPrinter(bean)); System.out.println(JsonUtil.formatPrinter(bean));
System.out.println("直接使用提供的csv文件读取============================================");
} while (readFileBean.hasNext()); } while (readFileBean.hasNext());
} }
private static String getValue(String[] value, int index) {
try {
return value[index];
} catch (Exception e) {
return "";
}
}
} }

View File

@ -2,13 +2,16 @@ package com.yexuejc.base.util.bean;
import java.io.Serializable; import java.io.Serializable;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.yexuejc.base.annotation.CsvToBean; import com.yexuejc.base.annotation.CsvToBean;
import com.yexuejc.base.converter.IntegerNullValueDeserializer;
/** /**
* *
* @author: yexuejc * @author: yexuejc
* @date: 2024/2/27 10:40 * @date: 2024/2/27 10:40
*/ */
@CsvToBean(header = "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId")
public class AppnodeCertCsvBean implements Serializable { public class AppnodeCertCsvBean implements Serializable {
/**是否生效Y/N*/ /**是否生效Y/N*/
private String enable; private String enable;
@ -36,6 +39,7 @@ public class AppnodeCertCsvBean implements Serializable {
/** /**
* appnode协议时且远程部署时对应的远程appnode的ApiNodeId * appnode协议时且远程部署时对应的远程appnode的ApiNodeId
*/ */
@JsonDeserialize(using = IntegerNullValueDeserializer.class)
private Integer appnodeId; private Integer appnodeId;
public String getEnable() { public String getEnable() {