From 878d120fb06eebd61c82b29f204ea518f209b41d Mon Sep 17 00:00:00 2001 From: digua Date: Wed, 24 Dec 2025 00:06:20 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81WhatsApp=E5=8E=9F?= =?UTF-8?q?=E7=94=9F=E6=A0=BC=E5=BC=8F=E6=B6=88=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- electron/main/parser/formats/index.ts | 12 +- .../parser/formats/whatsapp-native-txt.ts | 339 ++++++++++++++++++ electron/main/worker/import/streamImport.ts | 1 + .../Overview/OverviewIdentityCard.vue | 2 +- src/components/charts/RankList.vue | 4 +- .../private-chat/components/OverviewTab.vue | 33 +- 6 files changed, 369 insertions(+), 22 deletions(-) create mode 100644 electron/main/parser/formats/whatsapp-native-txt.ts diff --git a/electron/main/parser/formats/index.ts b/electron/main/parser/formats/index.ts index e4d6f53..297298d 100644 --- a/electron/main/parser/formats/index.ts +++ b/electron/main/parser/formats/index.ts @@ -12,6 +12,7 @@ import shuakamiQqExporter from './shuakami-qq-exporter' import yccccccyEchotrace from './ycccccccy-echotrace' import wechatDefault from './wechat-default' import qqNativeTxt from './qq-native-txt' +import whatsappNativeTxt from './whatsapp-native-txt' /** * 所有支持的格式模块(按优先级排序) @@ -22,8 +23,17 @@ export const formats: FormatModule[] = [ shuakamiQqExporter, // 优先级 10 - shuakami/qq-chat-exporter yccccccyEchotrace, // 优先级 15 - ycccccccy/echotrace 微信导出 wechatDefault, // 优先级 20 - 微信数据库导出 JSON + whatsappNativeTxt, // 优先级 25 - WhatsApp 官方导出 TXT qqNativeTxt, // 优先级 30 - QQ 官方导出 TXT ] // 按名称导出,方便单独使用 -export { chatlab, chatlabJsonl, shuakamiQqExporter, yccccccyEchotrace, wechatDefault, qqNativeTxt } +export { + chatlab, + chatlabJsonl, + shuakamiQqExporter, + yccccccyEchotrace, + wechatDefault, + qqNativeTxt, + whatsappNativeTxt, +} diff --git a/electron/main/parser/formats/whatsapp-native-txt.ts b/electron/main/parser/formats/whatsapp-native-txt.ts new file mode 100644 index 0000000..48b4be7 --- /dev/null +++ b/electron/main/parser/formats/whatsapp-native-txt.ts @@ -0,0 +1,339 @@ +/** + * WhatsApp 官方导出 TXT 格式解析器 + * 适配 WhatsApp 聊天导出功能 + * + * 格式特征: + * - 文件头:消息和通话已进行端到端加密 + * - 消息格式:YYYY/MM/DD HH:MM - 昵称: 内容 + * - 系统消息:YYYY/MM/DD HH:MM - 系统内容(无冒号分隔) + * - 媒体占位:<省略影音内容> + */ + +import * as fs from 'fs' +import * as path from 'path' +import * as readline from 'readline' +import { KNOWN_PLATFORMS, ChatType, MessageType } from '../../../../src/types/base' +import type { + FormatFeature, + FormatModule, + Parser, + ParseOptions, + ParseEvent, + ParsedMeta, + ParsedMember, + ParsedMessage, +} from '../types' +import { getFileSize, createProgress } from '../utils' + +// ==================== 辅助函数 ==================== + +/** + * 从文件名提取聊天名称 + * 例如:与开心每一天的 WhatsApp 聊天.txt → 开心每一天 + * 例如:与gaoberry37的 WhatsApp 聊天.txt → gaoberry37 + */ +function extractNameFromFilePath(filePath: string): string { + const basename = path.basename(filePath) + // 匹配:与xxx的 WhatsApp 聊天.txt + const match = basename.match(/^与(.+?)的\s*WhatsApp\s*聊天\.txt$/i) + if (match) { + return match[1].trim() + } + // 兜底:移除扩展名 + return basename.replace(/\.txt$/i, '') || '未知聊天' +} + +// ==================== 特征定义 ==================== + +export const feature: FormatFeature = { + id: 'whatsapp-native-txt', + name: 'WhatsApp 官方导出 (TXT)', + platform: KNOWN_PLATFORMS.WHATSAPP, + priority: 25, + extensions: ['.txt'], + signatures: { + // WhatsApp 导出文件的特征 + head: [/消息和通话已进行端到端加密/, /WhatsApp/i], + }, +} + +// ==================== 辅助函数:清理不可见字符 ==================== + +/** + * 清理行首/行尾的不可见 Unicode 字符 + * WhatsApp 导出文件中可能包含 BOM、Left-to-Right Mark (U+200E) 等 + */ +function cleanLine(line: string): string { + // 移除常见的不可见字符:BOM、LTR Mark、RTL Mark、零宽字符等 + return line.replace(/^[\uFEFF\u200E\u200F\u200B\u200C\u200D\u2060]+/, '').trim() +} + +// ==================== 消息头正则 ==================== + +// 匹配消息行:2025/12/22 12:35 - 地瓜: 内容 +// 或系统消息:2025/12/22 12:35 - 系统消息内容 +const MESSAGE_LINE_REGEX = /^(\d{4}\/\d{2}\/\d{2} \d{2}:\d{2}) - (.+)$/ + +// 从消息内容中分离昵称和实际内容 +// 格式:昵称: 内容 +const SENDER_CONTENT_REGEX = /^(.+?): (.*)$/ + +// ==================== 系统消息识别 ==================== + +const SYSTEM_MESSAGE_PATTERNS = [ + /消息和通话已进行端到端加密/, + /创建了此群组/, + /加入群组/, + /添加了/, + /退出了群组/, + /移除了/, + /更改了本群组/, + /已将此群组的设置更改为/, + /这条消息已删除/, +] + +function isSystemMessage(content: string): boolean { + return SYSTEM_MESSAGE_PATTERNS.some((pattern) => pattern.test(content)) +} + +// ==================== 消息类型判断 ==================== + +function detectMessageType(content: string): MessageType { + const trimmed = content.trim() + + // 媒体消息 + if (trimmed === '<省略影音内容>') return MessageType.IMAGE // 统一归类为图片 + if (trimmed.includes('<已附加:') || trimmed.includes('<附件:')) return MessageType.FILE + + // 删除消息 + if (trimmed === '这条消息已删除') return MessageType.RECALL + + // 系统消息 + if (isSystemMessage(trimmed)) return MessageType.SYSTEM + + return MessageType.TEXT +} + +// ==================== 时间解析 ==================== + +/** + * 解析 WhatsApp 时间格式为秒级时间戳 + * @param timeStr 格式:2025/12/22 12:35 + */ +function parseWhatsAppTime(timeStr: string): number { + // 将 YYYY/MM/DD HH:MM 转换为 YYYY-MM-DDTHH:MM + const normalized = timeStr.replace(/\//g, '-').replace(' ', 'T') + ':00' + const date = new Date(normalized) + return Math.floor(date.getTime() / 1000) +} + +// ==================== 成员信息 ==================== + +interface MemberInfo { + platformId: string + nickname: string +} + +// ==================== 解析器实现 ==================== + +async function* parseWhatsApp(options: ParseOptions): AsyncGenerator { + const { filePath, batchSize = 5000, onProgress, onLog } = options + + const totalBytes = getFileSize(filePath) + let bytesRead = 0 + let messagesProcessed = 0 + let skippedLines = 0 + + // 发送初始进度 + const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...') + yield { type: 'progress', data: initialProgress } + onProgress?.(initialProgress) + + // 记录解析开始 + onLog?.('info', `开始解析 WhatsApp TXT 文件,大小: ${(totalBytes / 1024 / 1024).toFixed(2)} MB`) + + // 收集数据 + const chatName = extractNameFromFilePath(filePath) + const memberMap = new Map() + const messages: ParsedMessage[] = [] + + // 当前正在解析的消息(可能跨多行) + let currentMessage: { + timestamp: number + sender: string | null // null 表示系统消息 + contentLines: string[] + } | null = null + + // 保存当前消息 + const saveCurrentMessage = () => { + if (currentMessage) { + const content = currentMessage.contentLines.join('\n').trim() + const type = detectMessageType(content) + + // 系统消息使用特殊 ID 和统一名称 + const senderPlatformId = currentMessage.sender || 'system' + const senderName = currentMessage.sender || '系统消息' + + messages.push({ + senderPlatformId, + senderAccountName: senderName, + timestamp: currentMessage.timestamp, + type, + content: content || null, + }) + + // 更新成员信息(跳过系统消息) + if (currentMessage.sender) { + memberMap.set(senderPlatformId, { + platformId: senderPlatformId, + nickname: senderName, + }) + } + + messagesProcessed++ + } + } + + // 逐行读取文件 + const fileStream = fs.createReadStream(filePath, { encoding: 'utf-8' }) + const rl = readline.createInterface({ + input: fileStream, + crlfDelay: Infinity, + }) + + fileStream.on('data', (chunk: string | Buffer) => { + bytesRead += typeof chunk === 'string' ? Buffer.byteLength(chunk) : chunk.length + }) + + for await (const line of rl) { + // 清理行首不可见字符 + const cleanedLine = cleanLine(line) + + // 尝试匹配消息行 + const lineMatch = cleanedLine.match(MESSAGE_LINE_REGEX) + if (lineMatch) { + // 保存前一条消息 + saveCurrentMessage() + + const timeStr = lineMatch[1] + const restContent = lineMatch[2] + + // 尝试分离发送者和内容 + const senderMatch = restContent.match(SENDER_CONTENT_REGEX) + if (senderMatch && !isSystemMessage(restContent)) { + // 普通消息 + currentMessage = { + timestamp: parseWhatsAppTime(timeStr), + sender: senderMatch[1].trim(), + contentLines: [senderMatch[2]], + } + } else { + // 系统消息 + currentMessage = { + timestamp: parseWhatsAppTime(timeStr), + sender: null, + contentLines: [restContent], + } + } + + // 更新进度 + if (messagesProcessed % 500 === 0) { + const progress = createProgress( + 'parsing', + bytesRead, + totalBytes, + messagesProcessed, + `已处理 ${messagesProcessed} 条消息...` + ) + onProgress?.(progress) + } + + continue + } + + // 非消息行:可能是多行消息的延续 + if (currentMessage && cleanedLine) { + currentMessage.contentLines.push(cleanedLine) + } else if (cleanedLine) { + // 无法解析的非空行 + skippedLines++ + } + } + + // 保存最后一条消息 + saveCurrentMessage() + + // 确定聊天类型:根据参与者数量判断 + // - 排除系统成员后,2 人或更少:私聊 + // - 超过 2 人:群聊 + const hasSystemMember = memberMap.has('system') + const realMemberCount = hasSystemMember ? memberMap.size - 1 : memberMap.size + + const chatType = realMemberCount > 2 ? ChatType.GROUP : ChatType.PRIVATE + + // 发送 meta + const meta: ParsedMeta = { + name: chatName, + platform: KNOWN_PLATFORMS.WHATSAPP, + type: chatType, + } + yield { type: 'meta', data: meta } + + // 发送成员 + const members: ParsedMember[] = Array.from(memberMap.values()).map((m) => ({ + platformId: m.platformId, + accountName: m.nickname, + })) + yield { type: 'members', data: members } + + // 分批发送消息 + for (let i = 0; i < messages.length; i += batchSize) { + const batch = messages.slice(i, i + batchSize) + yield { type: 'messages', data: batch } + } + + // 完成 + const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成') + yield { type: 'progress', data: doneProgress } + onProgress?.(doneProgress) + + // 统计消息类型 + const typeCounts = new Map() + for (const msg of messages) { + typeCounts.set(msg.type, (typeCounts.get(msg.type) || 0) + 1) + } + + // 记录解析摘要 + onLog?.('info', `解析完成: ${messagesProcessed} 条消息, ${memberMap.size} 个成员, 类型: ${chatType}`) + onLog?.( + 'info', + `消息类型统计: ${Array.from(typeCounts.entries()) + .map(([type, count]) => `${type}=${count}`) + .join(', ')}` + ) + if (skippedLines > 0) { + onLog?.('info', `跳过 ${skippedLines} 行无法解析的内容`) + } + + yield { + type: 'done', + data: { messageCount: messagesProcessed, memberCount: memberMap.size }, + } +} + +// ==================== 导出解析器 ==================== + +export const parser_: Parser = { + feature, + parse: parseWhatsApp, +} + +// ==================== 导出格式模块 ==================== + +const module_: FormatModule = { + feature, + parser: parser_, + // TXT 格式不需要预处理器 +} + +export default module_ diff --git a/electron/main/worker/import/streamImport.ts b/electron/main/worker/import/streamImport.ts index 12bd62e..8596f93 100644 --- a/electron/main/worker/import/streamImport.ts +++ b/electron/main/worker/import/streamImport.ts @@ -379,6 +379,7 @@ export async function streamImport(filePath: string, requestId: string): Promise onMeta: (meta: ParsedMeta) => { if (!metaInserted) { + logInfo(`写入 meta: name=${meta.name}, type=${meta.type}`) insertMeta.run( meta.name, meta.platform, diff --git a/src/components/analysis/Overview/OverviewIdentityCard.vue b/src/components/analysis/Overview/OverviewIdentityCard.vue index cec178c..c4d9a74 100644 --- a/src/components/analysis/Overview/OverviewIdentityCard.vue +++ b/src/components/analysis/Overview/OverviewIdentityCard.vue @@ -19,7 +19,7 @@ defineProps<{

- {{ session.memberCount > 2 ? '群聊' : '私聊' }} · + 私聊 · 数据分析报告

diff --git a/src/components/charts/RankList.vue b/src/components/charts/RankList.vue index dad1f14..2a46069 100644 --- a/src/components/charts/RankList.vue +++ b/src/components/charts/RankList.vue @@ -60,7 +60,7 @@ function getRelativePercentage(index: number): number {
-

+

{{ member.name }}

@@ -77,7 +77,7 @@ function getRelativePercentage(index: number): number { -
+
{{ member.value }} {{ unit }} ({{ member.percentage }}%)
diff --git a/src/pages/private-chat/components/OverviewTab.vue b/src/pages/private-chat/components/OverviewTab.vue index b20fe55..cc04cdc 100644 --- a/src/pages/private-chat/components/OverviewTab.vue +++ b/src/pages/private-chat/components/OverviewTab.vue @@ -2,13 +2,7 @@ import { computed, ref, watch } from 'vue' import type { AnalysisSession, MessageType } from '@/types/base' import { getMessageTypeName } from '@/types/base' -import type { - MemberActivity, - HourlyActivity, - DailyActivity, - WeekdayActivity, - MonthlyActivity, -} from '@/types/analysis' +import type { MemberActivity, HourlyActivity, DailyActivity, WeekdayActivity, MonthlyActivity } from '@/types/analysis' import { DoughnutChart } from '@/components/charts' import type { DoughnutChartData } from '@/components/charts' import { SectionCard } from '@/components/UI' @@ -64,25 +58,28 @@ const typeChartData = computed(() => { } }) -// 双方消息对比数据 +// 双方消息对比数据(取消息数最多的两个成员) const memberComparisonData = computed(() => { - if (props.memberActivity.length !== 2) return null + // 私聊页面需要至少 2 个成员才能对比 + if (props.memberActivity.length < 2) return null + // 按消息数排序,取前两名 const sorted = [...props.memberActivity].sort((a, b) => b.messageCount - a.messageCount) - const total = sorted[0].messageCount + sorted[1].messageCount + const top2 = sorted.slice(0, 2) + const total = top2[0].messageCount + top2[1].messageCount return { member1: { - name: sorted[0].name, - avatar: sorted[0].avatar, - count: sorted[0].messageCount, - percentage: total > 0 ? Math.round((sorted[0].messageCount / total) * 100) : 0, + name: top2[0].name, + avatar: top2[0].avatar, + count: top2[0].messageCount, + percentage: total > 0 ? Math.round((top2[0].messageCount / total) * 100) : 0, }, member2: { - name: sorted[1].name, - avatar: sorted[1].avatar, - count: sorted[1].messageCount, - percentage: total > 0 ? Math.round((sorted[1].messageCount / total) * 100) : 0, + name: top2[1].name, + avatar: top2[1].avatar, + count: top2[1].messageCount, + percentage: total > 0 ? Math.round((top2[1].messageCount / total) * 100) : 0, }, total, }