mirror of
https://github.com/hellodigua/ChatLab.git
synced 2026-05-06 21:16:10 +08:00
fix: 修复部分whatsApp聊天记录不会被正确识别的问题
This commit is contained in:
@@ -54,10 +54,16 @@ export const feature: FormatFeature = {
|
||||
signatures: {
|
||||
// WhatsApp 导出文件的特征(中文/英文)
|
||||
head: [
|
||||
/消息和通话已进行端到端加密/, // 中文
|
||||
/Messages and calls are end-to-end encrypted/i, // 英文
|
||||
/WhatsApp/i,
|
||||
/消息和通话已进行端到端加密/, // 中文加密提示
|
||||
/Messages and calls are end-to-end encrypted/i, // 英文加密提示
|
||||
/WhatsApp/i, // 通用 WhatsApp 关键词
|
||||
/你发送给自己的消息已进行端到端加密/, // 中文自己对话提示
|
||||
/有人添加了你/, // 中文群聊添加提示
|
||||
/\d{4}\/\d{1,2}\/\d{1,2} \d{1,2}:\d{2} - /, // 消息行格式特征
|
||||
/^\[\d{1,2}\/\d{1,2}\/\d{2},? \d{1,2}:\d{2}:\d{2}\] /, // 消息行格式特征 V2
|
||||
],
|
||||
// 文件名特征:与xxx的 WhatsApp 聊天.txt
|
||||
filename: [/^与.+的\s*WhatsApp\s*聊天\.txt$/i],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -74,8 +80,9 @@ function cleanLine(line: string): string {
|
||||
|
||||
// ==================== 消息头正则 ====================
|
||||
|
||||
// 格式1:2025/12/22 12:35 - 地瓜: 内容(部分地区导出格式)
|
||||
const MESSAGE_LINE_REGEX_V1 = /^(\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}) - (.+)$/
|
||||
// 格式1:2025/12/22 12:35 或 2025/2/2 9:35 - 地瓜: 内容(部分地区导出格式)
|
||||
// 支持月份、日期、小时为 1-2 位数字
|
||||
const MESSAGE_LINE_REGEX_V1 = /^(\d{4}\/\d{1,2}\/\d{1,2} \d{1,2}:\d{2}) - (.+)$/
|
||||
|
||||
// 格式2:[6/7/25 22:44:26] 或 [10/12/25, 12:50:16](中文/英文地区导出格式)
|
||||
// 日期和时间之间可能有逗号(英文)或没有(中文)
|
||||
@@ -139,7 +146,7 @@ function detectMessageType(content: string): MessageType {
|
||||
/**
|
||||
* 解析 WhatsApp 时间格式为秒级时间戳
|
||||
* 支持两种格式:
|
||||
* - 格式1:2025/12/22 12:35(YYYY/MM/DD HH:MM)
|
||||
* - 格式1:2025/12/22 12:35 或 2025/2/2 9:35(YYYY/M/D H:MM,月日时可为 1-2 位)
|
||||
* - 格式2:6/7/25 22:44:26(M/D/YY HH:MM:SS)
|
||||
*/
|
||||
function parseWhatsAppTime(timeStr: string, isV2Format: boolean = false): number {
|
||||
@@ -164,7 +171,22 @@ function parseWhatsAppTime(timeStr: string, isV2Format: boolean = false): number
|
||||
}
|
||||
}
|
||||
|
||||
// 格式1:YYYY/MM/DD HH:MM
|
||||
// 格式1:YYYY/M/D H:MM(月、日、时可为 1-2 位数字)
|
||||
const match = timeStr.match(/^(\d{4})\/(\d{1,2})\/(\d{1,2}) (\d{1,2}):(\d{2})$/)
|
||||
if (match) {
|
||||
const [, year, month, day, hour, minute] = match
|
||||
const date = new Date(
|
||||
parseInt(year, 10),
|
||||
parseInt(month, 10) - 1,
|
||||
parseInt(day, 10),
|
||||
parseInt(hour, 10),
|
||||
parseInt(minute, 10),
|
||||
0
|
||||
)
|
||||
return Math.floor(date.getTime() / 1000)
|
||||
}
|
||||
|
||||
// 兜底:尝试标准格式解析(YYYY/MM/DD HH:MM)
|
||||
const normalized = timeStr.replace(/\//g, '-').replace(' ', 'T') + ':00'
|
||||
const date = new Date(normalized)
|
||||
return Math.floor(date.getTime() / 1000)
|
||||
|
||||
@@ -35,6 +35,14 @@ function matchHeadSignatures(headContent: string, patterns: RegExp[]): boolean {
|
||||
return patterns.some((pattern) => pattern.test(headContent))
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文件名是否匹配签名
|
||||
*/
|
||||
function matchFilenameSignatures(filePath: string, patterns: RegExp[]): boolean {
|
||||
const filename = path.basename(filePath)
|
||||
return patterns.some((pattern) => pattern.test(filename))
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查必需字段是否存在
|
||||
*/
|
||||
@@ -102,7 +110,7 @@ export class FormatSniffer {
|
||||
const headContent = readFileHead(filePath)
|
||||
|
||||
for (const { feature } of this.formats) {
|
||||
if (this.matchFeature(feature, ext, headContent)) {
|
||||
if (this.matchFeature(feature, ext, headContent, filePath)) {
|
||||
return feature
|
||||
}
|
||||
}
|
||||
@@ -120,7 +128,7 @@ export class FormatSniffer {
|
||||
const headContent = readFileHead(filePath)
|
||||
|
||||
for (const { feature, parser } of this.formats) {
|
||||
if (this.matchFeature(feature, ext, headContent)) {
|
||||
if (this.matchFeature(feature, ext, headContent, filePath)) {
|
||||
return parser
|
||||
}
|
||||
}
|
||||
@@ -275,7 +283,12 @@ export class FormatSniffer {
|
||||
/**
|
||||
* 检查特征是否匹配
|
||||
*/
|
||||
private matchFeature(feature: FormatFeature, ext: string, headContent: string): boolean {
|
||||
private matchFeature(
|
||||
feature: FormatFeature,
|
||||
ext: string,
|
||||
headContent: string,
|
||||
filePath?: string
|
||||
): boolean {
|
||||
// 1. 检查扩展名
|
||||
if (!feature.extensions.includes(ext)) {
|
||||
return false
|
||||
@@ -284,20 +297,33 @@ export class FormatSniffer {
|
||||
const { signatures } = feature
|
||||
|
||||
// 2. 检查文件头签名(如果定义了)
|
||||
let headMatch = true
|
||||
if (signatures.head && signatures.head.length > 0) {
|
||||
if (!matchHeadSignatures(headContent, signatures.head)) {
|
||||
headMatch = matchHeadSignatures(headContent, signatures.head)
|
||||
}
|
||||
|
||||
// 3. 检查文件名签名(如果定义了,作为文件头匹配失败的补充)
|
||||
let filenameMatch = false
|
||||
if (signatures.filename && signatures.filename.length > 0 && filePath) {
|
||||
filenameMatch = matchFilenameSignatures(filePath, signatures.filename)
|
||||
}
|
||||
|
||||
// 文件头签名或文件名签名至少有一个匹配
|
||||
if (!headMatch && !filenameMatch) {
|
||||
// 如果两个都没定义,则认为匹配(只检查扩展名)
|
||||
if ((signatures.head && signatures.head.length > 0) || (signatures.filename && signatures.filename.length > 0)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 检查必需字段(如果定义了)
|
||||
// 4. 检查必需字段(如果定义了)
|
||||
if (signatures.requiredFields && signatures.requiredFields.length > 0) {
|
||||
if (!matchRequiredFields(headContent, signatures.requiredFields)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 检查字段值模式(如果定义了)
|
||||
// 5. 检查字段值模式(如果定义了)
|
||||
if (signatures.fieldPatterns) {
|
||||
for (const [, pattern] of Object.entries(signatures.fieldPatterns)) {
|
||||
if (!pattern.test(headContent)) {
|
||||
|
||||
@@ -65,6 +65,8 @@ export interface ParseResult {
|
||||
export interface FormatSignatures {
|
||||
/** 文件头正则匹配(任意一个匹配即可) */
|
||||
head?: RegExp[]
|
||||
/** 文件名正则匹配(任意一个匹配即可,作为文件头匹配的补充) */
|
||||
filename?: RegExp[]
|
||||
/** 必须存在的 JSON 字段路径 */
|
||||
requiredFields?: string[]
|
||||
/** 字段值模式匹配 */
|
||||
|
||||
Reference in New Issue
Block a user