[update] FileUtil增加读取大文件自定义方法和单纯读取方法

This commit is contained in:
its 2024-04-15 18:16:00 +08:00
parent e7b8f020ed
commit 9731faf9f8
5 changed files with 163 additions and 37 deletions

View File

@ -1,5 +1,13 @@
yexuejc-base 更新记录
------------------
#### version 1.5.3-jre8
**time ** <br/>
**branch** jre8 <br/>
**update** <br/>
1. [FileUtil.java](src/main/java/com/yexuejc/base/util/FileUtil.java)增加读取大文件自定义方法和单纯读取方法
2.
---
#### version 1.5.2-jre8
**time2024-4-7 14:34:33** <br/>
**branch** jre8 <br/>

View File

@ -0,0 +1,38 @@
package com.yexuejc.base.converter;
import java.io.IOException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.deser.std.StdScalarDeserializer;
/**
* 反序列化中内容为空时返回Integer为空
* <p>使用方式@JsonDeserialize(using = IntegerNullValueDeserializer.class)</p>
* @author: yexuejc
* @date: 2024/4/15 18:08
*/
public class IntegerNullValueDeserializer extends StdScalarDeserializer<Integer> {
public IntegerNullValueDeserializer() {
super(Integer.class);
}
@Override
public Integer deserialize(JsonParser p, DeserializationContext ctxt) throws IOException {
String value = p.getValueAsString();
if (isInteger(value)) {
return super._parseInteger(p, ctxt, Integer.class);
} else {
return null;
}
}
private static boolean isInteger(String s) {
try {
Integer.parseInt(s);
return true;
} catch (NumberFormatException e) {
return false;
}
}
}

View File

@ -18,6 +18,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.CRC32;
@ -38,11 +39,14 @@ import io.jsonwebtoken.lang.Assert;
* @time 2017年11月3日 下午3:12:49
*/
public class FileUtil {
static Logger logger = Logger.getLogger(FileUtil.class.getName());
private static final Logger logger = Logger.getLogger(FileUtil.class.getName());
private FileUtil() {
}
private static final String NEW_LINE = "\n";
private static final String CONSTANT_DOT = ".";
private static final String TYPE_TAR_GZ = ".tar.gz";
private static final String TYPE_CSV = ".csv";
private static final String TAR_GZ = "tar.gz";
@ -61,7 +65,7 @@ public class FileUtil {
if (fileName.lastIndexOf(TYPE_TAR_GZ) > 0) {
return TAR_GZ;
}
return fileName.substring(fileName.lastIndexOf(".") + 1);
return fileName.substring(fileName.lastIndexOf(CONSTANT_DOT) + 1);
} catch (Exception e) {
logger.severe("file doesn't exist or is not a file");
}
@ -332,22 +336,19 @@ public class FileUtil {
}
/**
* 分段读取大文件
* 分段读取大文件不限格式
*
* @param csvFilePath 文件路径
* @param filePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @param <T> 读取结果类型bean
* @return
* @return 文件分页读取内容自定义处理后及读取信息
*/
public static <T> ReadFileBean<T> readBigFile(String csvFilePath, ReadFileBean<T> readFileBean, Class<T> readCls) throws IOException {
if (!isFileExist(csvFilePath)) {
throw new FileNotFoundException(String.format("解析用的csv [%s] 文件不存在。", csvFilePath));
}
if (!csvFilePath.endsWith(TYPE_CSV)) {
throw new IOException(String.format("解析用的csv [%s] 文件不是CSV文件格式。", csvFilePath));
public static <T> ReadFileBean<T> readBigFile(String filePath, ReadFileBean<T> readFileBean, Function<List<String>, List<T>> readAfter) throws IOException {
if (!isFileExist(filePath)) {
throw new FileNotFoundException(String.format("[%s]文件不存在。", filePath));
}
List<String> datas = new ArrayList<>();
try (RandomAccessFile randomAccessFile = new RandomAccessFile(new File(csvFilePath), "r")) {
try (RandomAccessFile randomAccessFile = new RandomAccessFile(new File(filePath), "r")) {
if (readFileBean.getPointer() < 0) {
readFileBean.setPointer(0);
}
@ -365,29 +366,59 @@ public class FileUtil {
//无数据
return readFileBean.setDatas(new ArrayList<>());
}
//csv文件处理
com.yexuejc.base.pojo.CsvToBean csvToBean = getCsvToBean(readCls);
readFileBean.setHeader(csvToBean.getHeader());
if (csvToBean.hasHeader()) {
//文件存在header,设置header优先,没设置使用文件的
if (StrUtil.isNotEmpty(csvToBean.getHeader())) {
//替换header
datas.remove(0);
datas.add(0, csvToBean.getHeader());
} else {
readFileBean.setHeader(datas.get(0));
}
} else {
//文件不存在header使用设置的
datas.add(0, csvToBean.getHeader());
}
List<T> dataList = readCsv(String.join("\n", datas), readCls, csvToBean.getDelimiter());
List<T> dataList = readAfter.apply(datas);
readFileBean.setDatas(dataList);
return readFileBean;
}
/**
* 分段读取大文件(不解析)
*
* @param csvFilePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @return 文件分页读取内容每行为一个String对象及读取信息
*/
public static ReadFileBean<String> readBigFile(String csvFilePath, ReadFileBean<String> readFileBean) throws IOException {
return readBigFile(csvFilePath, readFileBean, (datas) -> datas);
}
/**
* 分段读取大文件(CSV格式)
*
* @param csvFilePath 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @param <T> 读取结果类型bean
* @return 文件分页读取内容转bean后及读取信息
*/
public static <T> ReadFileBean<T> readBigFile(String csvFilePath, ReadFileBean<T> readFileBean, Class<T> readCls) throws IOException {
if (!csvFilePath.endsWith(TYPE_CSV)) {
throw new IOException(String.format("[%s]文件不是CSV文件格式。", csvFilePath));
}
return readBigFile(csvFilePath, readFileBean, (datas) -> {
//csv文件处理
com.yexuejc.base.pojo.CsvToBean csvToBean = getCsvToBean(readCls);
readFileBean.setHeader(csvToBean.getHeader());
if (csvToBean.hasHeader()) {
//文件存在header,设置header优先,没设置使用文件的
if (StrUtil.isNotEmpty(csvToBean.getHeader())) {
//替换header
datas.remove(0);
datas.add(0, csvToBean.getHeader());
} else {
readFileBean.setHeader(datas.get(0));
}
} else {
//文件不存在header使用设置的
datas.add(0, csvToBean.getHeader());
}
try {
return readCsv(String.join(NEW_LINE, datas), readCls, csvToBean.getDelimiter());
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
/**
* 获取csv的header,使用注解{@link CsvToBean}
*

View File

@ -4,6 +4,7 @@ import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;
import com.yexuejc.base.pojo.ReadFileBean;
import com.yexuejc.base.util.bean.AppnodeCertCsvBean;
@ -54,19 +55,63 @@ public class FileUtilTest {
}
private static void readCsvFile() throws IOException {
String path = "F:\\coding\\yexuejc-base2\\src\\test\\java\\com\\yexuejc\\base\\util\\test.csv";
String path = "F:\\coding\\yexuejc-base\\src\\test\\java\\com\\yexuejc\\base\\util\\test.csv";
List<AppnodeCertCsvBean> list = FileUtil.readCsv(path, AppnodeCertCsvBean.class, true, "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId", ',');
System.out.println("***********************************************");
System.out.println(JsonUtil.formatPrinter(list));
System.out.println("条数:" + list.size());
// List<AppnodeCertCsvBean> list = FileUtil.readCsv(path, AppnodeCertCsvBean.class, true, "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId", ',');
// System.out.println("***********************************************");
// System.out.println(JsonUtil.formatPrinter(list));
// System.out.println("条数:" + list.size());
//直接把每行读取成字符串
ReadFileBean<String> readFileBean2 = new ReadFileBean<>(2);
ReadFileBean<String> bean2 = FileUtil.readBigFile(path, readFileBean2);
System.out.println("直接把每行读取成字符串============================================");
System.out.println(JsonUtil.formatPrinter(bean2));
System.out.println("直接把每行读取成字符串============================================");
//自定义每行数据的处理
ReadFileBean<AppnodeCertCsvBean> readFileBean1 = new ReadFileBean<>(2);
ReadFileBean<AppnodeCertCsvBean> bean1 = FileUtil.readBigFile(path, readFileBean1, datas -> {
if (readFileBean1.getStartRowNum() == 1) {
datas.remove(0);//跳过第一行
}
return datas.stream().map(str -> {
//自定义处理每一条数据
String[] split = str.split(",");
AppnodeCertCsvBean app = new AppnodeCertCsvBean();
app.setEnable(getValue(split, 0));
app.setDomain(getValue(split, 1));
app.setProtocol(getValue(split, 2));
app.setDeployHost(getValue(split, 3));
app.setDeployPath(getValue(split, 4));
app.setUname(getValue(split, 5));
app.setPwd(getValue(split, 6));
if (StrUtil.isNotEmpty(getValue(split, 7))) {
app.setAppnodeId(Integer.valueOf(getValue(split, 7)));
}
return app;
}).collect(Collectors.toList());
});
System.out.println("自定义每行数据的处理============================================");
System.out.println(JsonUtil.formatPrinter(bean1));
System.out.println("自定义每行数据的处理============================================");
//直接使用提供的csv文件读取
ReadFileBean<AppnodeCertCsvBean> readFileBean = new ReadFileBean<>(2);
do {
ReadFileBean<AppnodeCertCsvBean> bean = FileUtil.readBigFile(path, readFileBean, AppnodeCertCsvBean.class);
System.out.println("============================================");
System.out.println("直接使用提供的csv文件读取============================================");
System.out.println(JsonUtil.formatPrinter(bean));
System.out.println("直接使用提供的csv文件读取============================================");
} while (readFileBean.hasNext());
}
private static String getValue(String[] value, int index) {
try {
return value[index];
} catch (Exception e) {
return "";
}
}
}

View File

@ -2,13 +2,16 @@ package com.yexuejc.base.util.bean;
import java.io.Serializable;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.yexuejc.base.annotation.CsvToBean;
import com.yexuejc.base.converter.IntegerNullValueDeserializer;
/**
*
* @author: yexuejc
* @date: 2024/2/27 10:40
*/
@CsvToBean(header = "enable,domain,protocol,deployHost,deployPath,uname,pwd,appnodeId")
public class AppnodeCertCsvBean implements Serializable {
/**是否生效Y/N*/
private String enable;
@ -36,6 +39,7 @@ public class AppnodeCertCsvBean implements Serializable {
/**
* appnode协议时且远程部署时对应的远程appnode的ApiNodeId
*/
@JsonDeserialize(using = IntegerNullValueDeserializer.class)
private Integer appnodeId;
public String getEnable() {