[feat] 增加读取大文件(csv)

This commit is contained in:
yexuejc 2022-05-17 00:28:35 +08:00
parent 933eacc1c2
commit 9340edb0ef
6 changed files with 210 additions and 2 deletions

View File

@ -0,0 +1,21 @@
package com.yexuejc.base.annotation;
import java.lang.annotation.*;
/**
* 设置csv header
*
* @author MAXF-MAC
*/
@Target({ElementType.TYPE})
@Retention(RetentionPolicy.RUNTIME)
@Documented
@Inherited
public @interface CsvHeader {
/**
* 在类头上设置csv格式的header
* @return
*/
String header();
}

View File

@ -1,4 +1,6 @@
package com.yexuejc.base.util; package com.yexuejc.base.annotation;
import com.yexuejc.base.util.ObjUtil;
import java.lang.annotation.*; import java.lang.annotation.*;

View File

@ -0,0 +1,85 @@
package com.yexuejc.base.pojo;
import java.util.List;
/**
* @author maxf
* @class-name ReadFileBean
* @description 分段读取大文件
* @date 2022/5/16 21:53
*/
public class ReadFileBean<T> {
/**
* 开始行数
*/
private int startRowNum;
/**
* 结束行数
*/
private int endRowNum;
/**
* 每次读取的行数
*/
private int readRowNum;
/**
* 开始行到结束行的数据
*/
private List<T> datas;
/**
* 文件指针位置,默认0开始位置
*/
private long pointer = 0;
/**
* 文件的length
*/
private long fileLength;
public ReadFileBean(int readRow) {
this.readRowNum = readRow;
this.startRowNum = 1;
this.endRowNum = 0;
}
public int getReadRowNum() {
return readRowNum;
}
public int getStartRowNum() {
return startRowNum;
}
public int getEndRowNum() {
return endRowNum;
}
public List<T> getDatas() {
return datas;
}
public ReadFileBean<T> setDatas(List<T> datas) {
this.datas = datas;
this.startRowNum = this.endRowNum + 1;
this.endRowNum += this.readRowNum;
return this;
}
public long getPointer() {
return pointer;
}
public ReadFileBean<T> setPointer(long pointer) {
this.pointer = pointer;
return this;
}
public long getFileLength() {
return fileLength;
}
public ReadFileBean<T> setFileLength(long fileLength) {
this.fileLength = fileLength;
return this;
}
}

View File

@ -1,12 +1,21 @@
package com.yexuejc.base.util; package com.yexuejc.base.util;
import com.yexuejc.base.annotation.CsvHeader;
import com.yexuejc.base.pojo.ReadFileBean;
import io.jsonwebtoken.lang.Assert;
import java.io.*; import java.io.*;
import java.math.BigInteger; import java.math.BigInteger;
import java.nio.MappedByteBuffer; import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Base64; import java.util.Base64;
import java.util.Collection;
import java.util.List;
import java.util.logging.Logger; import java.util.logging.Logger;
import java.util.zip.CRC32; import java.util.zip.CRC32;
@ -25,6 +34,7 @@ public class FileUtil {
} }
private static final String TYPE_TAR_GZ = ".tar.gz"; private static final String TYPE_TAR_GZ = ".tar.gz";
private static final String TYPE_CSV = ".csv";
private static final String TAR_GZ = "tar.gz"; private static final String TAR_GZ = "tar.gz";
/** /**
@ -49,7 +59,7 @@ public class FileUtil {
} }
/** /**
* 判断文件是否存在 * 判断文件是否存在,不存在就创建一个空的
* *
* @param file * @param file
*/ */
@ -405,6 +415,92 @@ public class FileUtil {
return 0; return 0;
} }
/**
* 字符串csv格式 对象
*
* @param datas 转换的字符串
* <p> ------------ </p>
* <p> id,name,age </p>
* <p> 1,zhangsan,18 </p>
* <p> 2,lisi,20 </p>
* <p> ------------ </p>
* @param cls 需要转换的对象,含有<b>id,name,age</b>字段
* @param <I>
* @return
*/
public static <I> List readCsv(String datas, Class<I> cls) {
return null;
}
/**
* 分段读取大文件
*
* @param path 文件路径
* @param readFileBean 分段每次读取的bean 初始值需要设置每次读取的行数
* @param <T> 读取结果类型bean
* @return
*/
public <T> ReadFileBean<T> readBigFile(String path, ReadFileBean<T> readFileBean, Class<T> readCls) {
File file = new File(path);
judeFileExists(file);
try {
RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
randomAccessFile.seek(readFileBean.getPointer());
readFileBean.setFileLength(randomAccessFile.length());
List<String> datas = new ArrayList<>();
for (int i = 0; i < readFileBean.getReadRowNum(); i++) {
String s = randomAccessFile.readLine();
datas.add(charsetDecode(s, StandardCharsets.UTF_8));
readFileBean.setPointer(randomAccessFile.getFilePointer());
}
randomAccessFile.close();
if (path.contains(TYPE_CSV)) {
//csv文件处理
datas.add(0, getCsvHeader(readCls));
List<T> dataList = readCsv(String.join("\n", datas), readCls);
readFileBean.setDatas(dataList);
}
} catch (FileNotFoundException e) {
logger.severe("file exists." + e.getMessage());
} catch (IOException e) {
logger.severe("read file error." + e.getMessage());
}
return readFileBean;
}
/**
* 获取csv的header,使用注解{@link CsvHeader}
*
* @param cls
* @param <T>
* @return
*/
public static <T> String getCsvHeader(Class<T> cls) {
CsvHeader annotation = cls.getAnnotation(CsvHeader.class);
Assert.notNull(annotation, cls.toString() + "类上需要添加注解@CsvHeader并指定header。");
String header = annotation.header();
Assert.notNull(header, cls.toString() + "类上需要添加注解@CsvHeader并指定header。");
return header;
}
/**
* 把字符串data按照指定编码解码
*
* @param data 解码字符串
* @param charset 字符编码
* @return
*/
public static String charsetDecode(String data, Charset charset) {
char[] chars = data.toCharArray();
byte[] result = new byte[chars.length];
for (int i = 0; i < chars.length; i++) {
result[i] = (byte) chars[i];
}
return new String(result, charset);
}
/*public static void main(String[] args) { /*public static void main(String[] args) {
long size = FileUtil.size(new File("E:\\OS\\deepin-15.6-amd64\\DeepinCloudPrintServerInstaller_1.0.0.1.exe")); long size = FileUtil.size(new File("E:\\OS\\deepin-15.6-amd64\\DeepinCloudPrintServerInstaller_1.0.0.1.exe"));
System.out.println(size); System.out.println(size);

View File

@ -1,5 +1,7 @@
package com.yexuejc.base.util; package com.yexuejc.base.util;
import com.yexuejc.base.annotation.ToUeProperty;
import java.io.*; import java.io.*;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.time.LocalDate; import java.time.LocalDate;

View File

@ -1,5 +1,7 @@
package com.yexuejc.base.util; package com.yexuejc.base.util;
import com.yexuejc.base.annotation.ToUeProperty;
import java.io.Serializable; import java.io.Serializable;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.time.LocalDateTime; import java.time.LocalDateTime;