From 616e7e38eaa78bedc0affcec97632b125b98f49f Mon Sep 17 00:00:00 2001 From: digua Date: Tue, 23 Dec 2025 23:03:45 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=97=A7=E7=89=88QQ?= =?UTF-8?q?=20txt=E7=89=88=E6=9C=AC=E7=9A=84=E8=AE=A8=E8=AE=BA=E7=BB=84?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- electron/main/parser/formats/chatlab-jsonl.ts | 11 ++- electron/main/parser/formats/chatlab.ts | 11 ++- electron/main/parser/formats/qq-native-txt.ts | 46 ++++++++--- .../parser/formats/shuakami-qq-exporter.ts | 22 +++++- .../main/parser/formats/wechat-default.ts | 8 +- ...ts => ycccccccy-echotrace-preprocessor.ts} | 0 .../parser/formats/ycccccccy-echotrace.ts | 10 ++- electron/main/parser/index.ts | 6 +- electron/main/parser/types.ts | 5 ++ electron/main/worker/core/index.ts | 13 +++- electron/main/worker/core/perfLogger.ts | 78 ++++++++++++++++++- electron/main/worker/import/streamImport.ts | 36 ++++++++- 12 files changed, 218 insertions(+), 28 deletions(-) rename electron/main/parser/formats/{echotrace-preprocessor.ts => ycccccccy-echotrace-preprocessor.ts} (100%) diff --git a/electron/main/parser/formats/chatlab-jsonl.ts b/electron/main/parser/formats/chatlab-jsonl.ts index 3809a4b..5cda8f9 100644 --- a/electron/main/parser/formats/chatlab-jsonl.ts +++ b/electron/main/parser/formats/chatlab-jsonl.ts @@ -123,7 +123,7 @@ export const feature: FormatFeature = { // ==================== 解析器实现 ==================== async function* parseChatLabJsonl(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 @@ -134,6 +134,9 @@ async function* parseChatLabJsonl(options: ParseOptions): AsyncGenerator() @@ -258,11 +261,15 @@ async function* parseChatLabJsonl(options: ParseOptions): AsyncGenerator 0 ? members.length : memberMap.size + onLog?.('info', `解析完成: ${messagesProcessed} 条消息, ${memberCount} 个成员`) + yield { type: 'done', data: { messageCount: messagesProcessed, - memberCount: members.length > 0 ? members.length : memberMap.size, + memberCount, }, } } diff --git a/electron/main/parser/formats/chatlab.ts b/electron/main/parser/formats/chatlab.ts index dc00882..43ff9fb 100644 --- a/electron/main/parser/formats/chatlab.ts +++ b/electron/main/parser/formats/chatlab.ts @@ -76,7 +76,7 @@ interface ChatLabMember { // ==================== 解析器实现 ==================== async function* parseChatLab(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 @@ -87,6 +87,9 @@ async function* parseChatLab(options: ParseOptions): AsyncGenerator 0 ? members.length : memberMapFromMessages.size + onLog?.('info', `解析完成: ${messagesProcessed} 条消息, ${memberCount} 个成员`) + yield { type: 'done', data: { messageCount: messagesProcessed, - memberCount: members.length > 0 ? members.length : memberMapFromMessages.size, + memberCount, }, } } diff --git a/electron/main/parser/formats/qq-native-txt.ts b/electron/main/parser/formats/qq-native-txt.ts index 6281eef..238a0f4 100644 --- a/electron/main/parser/formats/qq-native-txt.ts +++ b/electron/main/parser/formats/qq-native-txt.ts @@ -50,17 +50,19 @@ export const feature: FormatFeature = { priority: 30, extensions: ['.txt'], signatures: { - head: [/消息记录(此消息记录为文本格式/, /消息对象:/], + // 支持群聊导出和多人聊天(讨论组)导出 + head: [/消息记录(此消息记录为文本格式/, /消息对象:/, /多人聊天/], }, } // ==================== 消息头正则 ==================== -// 匹配:2019-07-16 18:15:05 夜喵大人🐱(642163903) -// 或:2019-07-16 18:15:11 铛🔔 -const MESSAGE_HEADER_REGEX = /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (.+?)(?:\(([^)]+)\)|<([^>]+)>)$/ +// 群聊格式:2019-07-16 18:15:05 地瓜(23333233) +// 邮箱格式:2019-07-16 18:15:11 土豆 +// 讨论组格式:2017-08-29 20:28:30 番茄(没有 ID,只有昵称) +const MESSAGE_HEADER_REGEX = /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (.+?)(?:\(([^)]+)\)|<([^>]+)>)?$/ -// 匹配群名:消息对象:杭州FE +// 匹配群名:消息对象:xxx const GROUP_NAME_REGEX = /^消息对象:(.+)$/ // ==================== 消息类型判断 ==================== @@ -141,17 +143,21 @@ const lastValidNickname = new Map() // ==================== 解析器实现 ==================== async function* parseTxt(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 let messagesProcessed = 0 + let skippedLines = 0 // 跳过的无效行计数 // 发送初始进度 const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...') yield { type: 'progress', data: initialProgress } onProgress?.(initialProgress) + // 记录解析开始 + onLog?.('info', `开始解析 QQ TXT 文件,大小: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`) + // 收集数据 let groupName = '未知群聊' const memberMap = new Map() @@ -218,17 +224,19 @@ async function* parseTxt(options: ParseOptions): AsyncGenerator + // platformId: (id) 或 ,如果没有则使用昵称(讨论组格式) + let platformId = headerMatch[3] || headerMatch[4] || nickname // 如果昵称和 ID 相同,可能是系统故障,使用之前记录的昵称 - if (nickname === platformId) { + if (nickname === platformId && headerMatch[3]) { + // 只有当确实有 ID 时才检查昵称覆盖 const previousNickname = lastValidNickname.get(platformId) if (previousNickname) { nickname = previousNickname } // 如果没有之前的记录,保持使用 ID 作为昵称 - } else { - // 记录有效昵称(昵称 != ID) + } else if (headerMatch[3] || headerMatch[4]) { + // 记录有效昵称(有 ID 且昵称 != ID) lastValidNickname.set(platformId, nickname) } @@ -262,6 +270,18 @@ async function* parseTxt(options: ParseOptions): AsyncGenerator 0) { + onLog?.('info', `跳过 ${skippedLines} 行无法解析的内容`) + } + yield { type: 'done', data: { messageCount: messagesProcessed, memberCount: memberMap.size }, diff --git a/electron/main/parser/formats/shuakami-qq-exporter.ts b/electron/main/parser/formats/shuakami-qq-exporter.ts index 24f8490..dc911be 100644 --- a/electron/main/parser/formats/shuakami-qq-exporter.ts +++ b/electron/main/parser/formats/shuakami-qq-exporter.ts @@ -227,17 +227,21 @@ function convertMessageType( // ==================== 解析器实现 ==================== async function* parseV4(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 let messagesProcessed = 0 + let skippedMessages = 0 // 跳过的无效消息计数 // 发送初始进度 const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...') yield { type: 'progress', data: initialProgress } onProgress?.(initialProgress) + // 记录解析开始 + onLog?.('info', `开始解析 QQ Chat Exporter 文件,大小: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`) + // 读取文件头获取 meta 信息(增加到 500KB 以包含 chatInfo.avatar) const headContent = readFileHeadBytes(filePath, 500000) @@ -283,7 +287,10 @@ async function* parseV4(options: ParseOptions): AsyncGenerator 0) { + onLog?.('info', `跳过 ${skippedMessages} 条无效消息(缺少发送者ID或时间戳无效)`) + } + yield { type: 'done', data: { messageCount: messagesProcessed, memberCount: memberMap.size }, diff --git a/electron/main/parser/formats/wechat-default.ts b/electron/main/parser/formats/wechat-default.ts index 437521b..80c960b 100644 --- a/electron/main/parser/formats/wechat-default.ts +++ b/electron/main/parser/formats/wechat-default.ts @@ -208,7 +208,7 @@ function extractTextContent(wechatType: number, content: string | null): string // ==================== 解析器实现 ==================== async function* parseWechatDefault(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 @@ -219,6 +219,9 @@ async function* parseWechatDefault(options: ParseOptions): AsyncGenerator { - const { filePath, batchSize = 5000, onProgress } = options + const { filePath, batchSize = 5000, onProgress, onLog } = options const totalBytes = getFileSize(filePath) let bytesRead = 0 @@ -158,6 +158,9 @@ async function* parseEchotrace(options: ParseOptions): AsyncGenerator void onMembers: (members: ParsedMember[]) => void onMessageBatch: (messages: ParsedMessage[]) => void + /** 日志回调(可选) */ + onLog?: (level: 'info' | 'error', message: string) => void } export interface StreamParseOptions extends StreamParseCallbacks { @@ -234,9 +236,9 @@ export async function streamParseFile( filePath: string, callbacks: Omit ): Promise { - const { onProgress, onMeta, onMembers, onMessageBatch, batchSize = 5000 } = callbacks + const { onProgress, onMeta, onMembers, onMessageBatch, onLog, batchSize = 5000 } = callbacks - for await (const event of parseFile({ filePath, batchSize, onProgress })) { + for await (const event of parseFile({ filePath, batchSize, onProgress, onLog })) { switch (event.type) { case 'meta': onMeta(event.data) diff --git a/electron/main/parser/types.ts b/electron/main/parser/types.ts index 10e92ae..f204717 100644 --- a/electron/main/parser/types.ts +++ b/electron/main/parser/types.ts @@ -90,6 +90,9 @@ export interface FormatFeature { // ==================== 解析层:解析器接口 ==================== +/** 日志级别 */ +export type LogLevel = 'info' | 'error' + /** * 解析选项 */ @@ -100,6 +103,8 @@ export interface ParseOptions { batchSize?: number /** 进度回调(可选,用于外部监听) */ onProgress?: (progress: ParseProgress) => void + /** 日志回调(可选,用于记录解析过程中的信息、警告、错误) */ + onLog?: (level: LogLevel, message: string) => void } /** diff --git a/electron/main/worker/core/index.ts b/electron/main/worker/core/index.ts index 8f648b0..c424383 100644 --- a/electron/main/worker/core/index.ts +++ b/electron/main/worker/core/index.ts @@ -15,5 +15,16 @@ export { type TimeFilter, } from './dbCore' -export { initPerfLog, logPerf, logPerfDetail, resetPerfLog, getCurrentLogFile } from './perfLogger' +export { + initPerfLog, + logPerf, + logPerfDetail, + resetPerfLog, + getCurrentLogFile, + logError, + logInfo, + logSummary, + getErrorCount, + LogLevel, +} from './perfLogger' diff --git a/electron/main/worker/core/perfLogger.ts b/electron/main/worker/core/perfLogger.ts index a0ffecf..99cd512 100644 --- a/electron/main/worker/core/perfLogger.ts +++ b/electron/main/worker/core/perfLogger.ts @@ -1,17 +1,26 @@ /** - * 性能日志模块 - * 实时记录导入过程的性能指标 + * 导入日志模块 + * 实时记录导入过程的性能指标、错误和警告信息 */ import * as fs from 'fs' import * as path from 'path' import { getDbDir } from './dbCore' +// 日志级别 +export enum LogLevel { + ERROR = 'ERROR', + INFO = 'INFO', +} + // 状态 let lastLogTime = Date.now() let lastMessageCount = 0 let currentLogFile: string | null = null +// 统计计数器 +let errorCount = 0 + /** * 获取性能日志目录 */ @@ -32,7 +41,7 @@ export function initPerfLog(sessionId: string): void { const logDir = getLogDir() currentLogFile = path.join(logDir, `import_${sessionId}_${Date.now()}.log`) // 写入头部 - fs.writeFileSync(currentLogFile, `=== 导入性能日志 ===\n开始时间: ${new Date().toISOString()}\n\n`, 'utf-8') + fs.writeFileSync(currentLogFile, `=== 导入日志 ===\n开始时间: ${new Date().toISOString()}\n\n`, 'utf-8') } catch { // 忽略初始化失败 } @@ -98,6 +107,7 @@ export function resetPerfLog(): void { lastLogTime = Date.now() lastMessageCount = 0 currentLogFile = null + errorCount = 0 } /** @@ -106,3 +116,65 @@ export function resetPerfLog(): void { export function getCurrentLogFile(): string | null { return currentLogFile } + +// ==================== 通用日志函数 ==================== + +/** + * 写入日志行 + */ +function writeLogLine(level: LogLevel, message: string): void { + if (!currentLogFile) return + + const logLine = `[${new Date().toISOString()}] [${level}] ${message}\n` + try { + fs.appendFileSync(currentLogFile, logLine, 'utf-8') + } catch { + // 忽略写入失败 + } +} + +/** + * 记录错误日志 + * @param message 错误描述 + * @param error 可选的 Error 对象 + */ +export function logError(message: string, error?: Error): void { + errorCount++ + const errorDetail = error ? `: ${error.message}` : '' + writeLogLine(LogLevel.ERROR, `${message}${errorDetail}`) +} + +/** + * 记录信息日志 + * @param message 信息描述 + */ +export function logInfo(message: string): void { + writeLogLine(LogLevel.INFO, message) +} + +/** + * 获取错误计数 + */ +export function getErrorCount(): number { + return errorCount +} + +/** + * 写入日志摘要(导入完成时调用) + */ +export function logSummary(totalMessages: number, totalMembers: number): void { + if (!currentLogFile) return + + const summary = ` +=== 导入摘要 === +结束时间: ${new Date().toISOString()} +总消息数: ${totalMessages.toLocaleString()} +总成员数: ${totalMembers.toLocaleString()} +错误数: ${errorCount} +` + try { + fs.appendFileSync(currentLogFile, summary, 'utf-8') + } catch { + // 忽略 + } +} diff --git a/electron/main/worker/import/streamImport.ts b/electron/main/worker/import/streamImport.ts index dd43753..12bd62e 100644 --- a/electron/main/worker/import/streamImport.ts +++ b/electron/main/worker/import/streamImport.ts @@ -18,7 +18,15 @@ import { type ParsedMessage, } from '../../parser' import { getDbDir } from '../core' -import { initPerfLog, logPerf, logPerfDetail, resetPerfLog } from '../core' +import { + initPerfLog, + logPerf, + logPerfDetail, + resetPerfLog, + logInfo, + logError, + logSummary, +} from '../core' /** 流式导入结果 */ export interface StreamImportResult { @@ -210,6 +218,11 @@ export async function streamImport(filePath: string, requestId: string): Promise resetPerfLog() const sessionId = generateSessionId() initPerfLog(sessionId) + + // 记录导入开始信息 + logInfo(`文件路径: ${filePath}`) + logInfo(`检测到格式: ${formatFeature.name} (${formatFeature.id})`) + logInfo(`平台: ${formatFeature.platform}`) logPerf('开始导入', 0) // 预处理:如果格式需要且文件较大,先精简 @@ -218,6 +231,7 @@ export async function streamImport(filePath: string, requestId: string): Promise const preprocessor = getPreprocessor(filePath) if (preprocessor && needsPreprocess(filePath)) { + logInfo('文件需要预处理,开始精简大文件...') sendProgress(requestId, { stage: 'parsing', bytesRead: 0, @@ -235,10 +249,13 @@ export async function streamImport(filePath: string, requestId: string): Promise }) }) actualFilePath = tempFilePath + logInfo(`预处理完成,临时文件: ${tempFilePath}`) } catch (err) { + const errorMsg = `预处理失败: ${err instanceof Error ? err.message : String(err)}` + logError(errorMsg, err instanceof Error ? err : undefined) return { success: false, - error: `预处理失败: ${err instanceof Error ? err.message : String(err)}`, + error: errorMsg, } } } @@ -351,6 +368,15 @@ export async function streamImport(filePath: string, requestId: string): Promise sendProgress(requestId, progress) }, + onLog: (level, message) => { + // 将解析器日志写入导入日志文件 + if (level === 'error') { + logError(message) + } else { + logInfo(message) + } + }, + onMeta: (meta: ParsedMeta) => { if (!metaInserted) { insertMeta.run( @@ -625,8 +651,14 @@ export async function streamImport(filePath: string, requestId: string): Promise logPerf('WAL checkpoint 完成', totalMessageCount) logPerf('导入完成', totalMessageCount) + // 写入日志摘要 + logSummary(totalMessageCount, memberIdMap.size) + return { success: true, sessionId } } catch (error) { + // 记录错误日志 + logError('导入失败', error instanceof Error ? error : undefined) + // 回滚当前事务 if (inTransaction) { try {