feat: 实现聊天记录合并功能

2026-05-06 13:06:09 +08:00 · 2025-12-01 00:20:56 +08:00
parent 479d81960e
commit e7de9cc57d
18 changed files with 1606 additions and 94 deletions
@@ -8,6 +8,9 @@ import * as databaseCore from './database/core'
 import * as worker from './worker'
 // 导入解析器模块
 import * as parser from './parser'
+// 导入合并模块
+import * as merger from './merger'
+import type { MergeParams } from '../../src/types/chat'

 console.log('[IpcMain] Database, Worker and Parser modules imported')

@@ -575,6 +578,58 @@ const mainIpcMain = (win: BrowserWindow) => {
      }
    }
  )
+
+  // ==================== 合并功能 ====================
+
+  /**
+   * 解析文件获取基本信息（用于合并预览）
+   * 使用 Worker 线程异步执行，不阻塞主进程
+   */
+  ipcMain.handle('merge:parseFileInfo', async (_, filePath: string) => {
+    try {
+      // 使用 Worker 线程解析，避免阻塞 UI
+      return await worker.parseFileInfo(filePath)
+    } catch (error) {
+      console.error('解析文件信息失败：', error)
+      throw error
+    }
+  })
+
+  /**
+   * 检测合并冲突
+   */
+  ipcMain.handle('merge:checkConflicts', async (_, filePaths: string[]) => {
+    try {
+      return merger.checkConflicts(filePaths)
+    } catch (error) {
+      console.error('检测冲突失败：', error)
+      throw error
+    }
+  })
+
+  /**
+   * 执行合并
+   */
+  ipcMain.handle('merge:mergeFiles', async (_, params: MergeParams) => {
+    try {
+      return merger.mergeFiles(params)
+    } catch (error) {
+      console.error('合并失败：', error)
+      return { success: false, error: String(error) }
+    }
+  })
+
+  /**
+   * 显示打开对话框（通用）
+   */
+  ipcMain.handle('dialog:showOpenDialog', async (_, options) => {
+    try {
+      return await dialog.showOpenDialog(options)
+    } catch (error) {
+      console.error('显示对话框失败：', error)
+      throw error
+    }
+  })
 }

 export default mainIpcMain
@@ -0,0 +1,406 @@
+/**
+ * 聊天记录合并模块
+ * 支持多个聊天记录文件合并为 ChatLab 专属格式
+ */
+
+import * as fs from 'fs'
+import * as path from 'path'
+import { app } from 'electron'
+import { parseFile, detectFormat } from '../parser'
+import { importData } from '../database/core'
+import type {
+  ParseResult,
+  ParsedMessage,
+  ChatLabFormat,
+  ChatLabMember,
+  ChatLabMessage,
+  FileParseInfo,
+  MergeConflict,
+  ConflictCheckResult,
+  ConflictResolution,
+  MergeParams,
+  MergeResult,
+  ChatPlatform,
+  ChatType,
+  MergeSource,
+} from '../../../src/types/chat'
+
+/**
+ * 获取默认输出目录
+ */
+function getDefaultOutputDir(): string {
+  try {
+    const docPath = app.getPath('documents')
+    return path.join(docPath, 'ChatLab', 'merged')
+  } catch {
+    return path.join(process.cwd(), 'merged')
+  }
+}
+
+/**
+ * 确保输出目录存在
+ */
+function ensureOutputDir(dir: string): void {
+  if (!fs.existsSync(dir)) {
+    fs.mkdirSync(dir, { recursive: true })
+  }
+}
+
+/**
+ * 生成输出文件名
+ */
+function generateOutputFilename(name: string): string {
+  const date = new Date().toISOString().slice(0, 10).replace(/-/g, '')
+  const safeName = name.replace(/[/\\?%*:|"<>]/g, '_')
+  return `${safeName}_merged_${date}.chatlab.json`
+}
+
+/**
+ * 解析文件获取基本信息（用于预览）
+ */
+export function parseFileInfo(filePath: string): FileParseInfo {
+  const format = detectFormat(filePath)
+  if (!format) {
+    throw new Error('无法识别文件格式')
+  }
+
+  const result = parseFile(filePath)
+
+  return {
+    name: result.meta.name,
+    format,
+    platform: result.meta.platform,
+    messageCount: result.messages.length,
+    memberCount: result.members.length,
+  }
+}
+
+/**
+ * 生成消息的唯一标识（用于去重和冲突检测）
+ */
+function getMessageKey(msg: ParsedMessage): string {
+  return `${msg.timestamp}_${msg.senderPlatformId}_${(msg.content || '').length}`
+}
+
+/**
+ * 检测合并冲突
+ * 规则：时间戳 + 用户名 + 字符长度，当两项相同但另一项不同时报告冲突
+ */
+export function checkConflicts(filePaths: string[]): ConflictCheckResult {
+  const allMessages: Array<{ msg: ParsedMessage; source: string }> = []
+  const conflicts: MergeConflict[] = []
+
+  console.log('[Merger] checkConflicts: 开始检测冲突')
+  console.log(
+    '[Merger] 文件列表:',
+    filePaths.map((p) => path.basename(p))
+  )
+
+  // 先检查格式一致性
+  const formats: string[] = []
+  for (const filePath of filePaths) {
+    const format = detectFormat(filePath)
+    if (format) {
+      formats.push(format)
+    } else {
+      throw new Error(`无法识别文件格式: ${path.basename(filePath)}`)
+    }
+  }
+
+  // 检查是否所有文件格式一致
+  const uniqueFormats = [...new Set(formats)]
+  if (uniqueFormats.length > 1) {
+    throw new Error(
+      `不支持合并不同格式的聊天记录。\n检测到的格式：${uniqueFormats.join('、')}\n请确保所有文件使用相同的导出工具和格式。`
+    )
+  }
+  console.log('[Merger] 格式检查通过:', uniqueFormats[0])
+
+  // 解析所有文件
+  for (const filePath of filePaths) {
+    const result = parseFile(filePath)
+    const sourceName = path.basename(filePath)
+    console.log(`[Merger] 解析 ${sourceName}: ${result.messages.length} 条消息`)
+    for (const msg of result.messages) {
+      allMessages.push({ msg, source: sourceName })
+    }
+  }
+  console.log(`[Merger] 总消息数: ${allMessages.length}`)
+
+  // 按时间戳分组检测冲突
+  const timeGroups = new Map<number, Array<{ msg: ParsedMessage; source: string }>>()
+  for (const item of allMessages) {
+    const ts = item.msg.timestamp
+    if (!timeGroups.has(ts)) {
+      timeGroups.set(ts, [])
+    }
+    timeGroups.get(ts)!.push(item)
+  }
+  console.log(`[Merger] 唯一时间戳数: ${timeGroups.size}`)
+
+  // 统计有多条消息的时间戳
+  let multiMsgTsCount = 0
+  for (const [, items] of timeGroups) {
+    if (items.length > 1) multiMsgTsCount++
+  }
+  console.log(`[Merger] 有多条消息的时间戳数: ${multiMsgTsCount}`)
+
+  // 检测每个时间戳内的冲突
+  for (const [ts, items] of timeGroups) {
+    if (items.length < 2) continue
+
+    // 按发送者分组
+    const senderGroups = new Map<string, Array<{ msg: ParsedMessage; source: string }>>()
+    for (const item of items) {
+      const sender = item.msg.senderPlatformId
+      if (!senderGroups.has(sender)) {
+        senderGroups.set(sender, [])
+      }
+      senderGroups.get(sender)!.push(item)
+    }
+
+    // 检测同一时间戳同一发送者的不同内容
+    for (const [sender, senderItems] of senderGroups) {
+      if (senderItems.length < 2) continue
+
+      // 检查是否来自不同文件
+      const sources = new Set(senderItems.map((it) => it.source))
+      if (sources.size < 2) {
+        // 所有消息来自同一个文件，跳过（这是同一文件内同一秒内多条消息的情况）
+        continue
+      }
+
+      // 按内容长度分组
+      const lengthGroups = new Map<number, Array<{ msg: ParsedMessage; source: string }>>()
+      for (const item of senderItems) {
+        const len = (item.msg.content || '').length
+        if (!lengthGroups.has(len)) {
+          lengthGroups.set(len, [])
+        }
+        lengthGroups.get(len)!.push(item)
+      }
+
+      // 如果有多个不同长度的消息，说明可能是冲突
+      if (lengthGroups.size > 1) {
+        const lengthEntries = Array.from(lengthGroups.entries())
+        for (let i = 0; i < lengthEntries.length - 1; i++) {
+          for (let j = i + 1; j < lengthEntries.length; j++) {
+            const [len1, items1] = lengthEntries[i]
+            const [len2, items2] = lengthEntries[j]
+
+            // 找到两个来源不同的消息
+            const item1 = items1[0]
+            const item2 = items2.find((it) => it.source !== item1.source)
+
+            // 如果找不到来自不同文件的消息，跳过
+            if (!item2) continue
+
+            // 打印冲突详情
+            if (conflicts.length < 5) {
+              console.log(`[Merger] 冲突 #${conflicts.length + 1}:`)
+              console.log(`  时间戳: ${ts} (${new Date(ts * 1000).toLocaleString()})`)
+              console.log(`  发送者: ${sender} (${item1.msg.senderName})`)
+              console.log(
+                `  文件1: ${item1.source}, 长度: ${len1}, 内容: "${(item1.msg.content || '').slice(0, 50)}..."`
+              )
+              console.log(
+                `  文件2: ${item2.source}, 长度: ${len2}, 内容: "${(item2.msg.content || '').slice(0, 50)}..."`
+              )
+            }
+
+            conflicts.push({
+              id: `conflict_${ts}_${sender}_${conflicts.length}`,
+              timestamp: ts,
+              sender: item1.msg.senderName || sender,
+              contentLength1: len1,
+              contentLength2: len2,
+              content1: item1.msg.content || '',
+              content2: item2.msg.content || '',
+            })
+          }
+        }
+      }
+    }
+  }
+
+  console.log(`[Merger] 检测到冲突数: ${conflicts.length}`)
+
+  // 计算去重后的消息数
+  const uniqueKeys = new Set<string>()
+  for (const item of allMessages) {
+    uniqueKeys.add(getMessageKey(item.msg))
+  }
+  console.log(`[Merger] 去重后消息数: ${uniqueKeys.size}`)
+
+  return {
+    conflicts,
+    totalMessages: uniqueKeys.size,
+  }
+}
+
+/**
+ * 合并多个聊天记录文件
+ */
+export function mergeFiles(params: MergeParams): MergeResult {
+  try {
+    const { filePaths, outputName, outputDir, conflictResolutions, andAnalyze } = params
+
+    // 解析所有文件
+    const parseResults: Array<{ result: ParseResult; source: string }> = []
+    for (const filePath of filePaths) {
+      const result = parseFile(filePath)
+      parseResults.push({ result, source: path.basename(filePath) })
+    }
+
+    // 合并成员
+    const memberMap = new Map<string, ChatLabMember>()
+    for (const { result } of parseResults) {
+      for (const member of result.members) {
+        const existing = memberMap.get(member.platformId)
+        if (existing) {
+          // 如果昵称不同，添加到 aliases
+          if (existing.name !== member.name && !existing.aliases?.includes(member.name)) {
+            existing.aliases = existing.aliases || []
+            existing.aliases.push(member.name)
+          }
+        } else {
+          memberMap.set(member.platformId, {
+            platformId: member.platformId,
+            name: member.name,
+          })
+        }
+      }
+    }
+
+    // 合并消息（带冲突解决和去重）
+    const resolutionMap = new Map(conflictResolutions.map((r) => [r.id, r.resolution]))
+    const allMessages: Array<{ msg: ParsedMessage; source: string }> = []
+
+    for (const { result, source } of parseResults) {
+      for (const msg of result.messages) {
+        allMessages.push({ msg, source })
+      }
+    }
+
+    // 去重逻辑
+    const messageMap = new Map<string, ChatLabMessage[]>()
+    const processedConflicts = new Set<string>()
+
+    for (const { msg } of allMessages) {
+      const key = getMessageKey(msg)
+
+      // 检查是否是冲突消息
+      const conflictId = conflictResolutions.find((c) => {
+        return c.id.includes(`${msg.timestamp}_${msg.senderPlatformId}`)
+      })?.id
+
+      if (conflictId && !processedConflicts.has(conflictId)) {
+        processedConflicts.add(conflictId)
+        const resolution = resolutionMap.get(conflictId)
+
+        // 根据解决方案处理
+        if (resolution === 'keepBoth') {
+          // 保留两者：不去重
+        } else if (resolution === 'keep1' || resolution === 'keep2') {
+          // 保留其中一个：跳过另一个（简化处理，保留第一个遇到的）
+        }
+      }
+
+      // 添加消息
+      if (!messageMap.has(key)) {
+        messageMap.set(key, [])
+      }
+
+      const chatLabMsg: ChatLabMessage = {
+        sender: msg.senderPlatformId,
+        name: msg.senderName,
+        timestamp: msg.timestamp,
+        type: msg.type,
+        content: msg.content,
+      }
+
+      // 只添加一次（去重）
+      const existing = messageMap.get(key)!
+      if (existing.length === 0) {
+        existing.push(chatLabMsg)
+      }
+    }
+
+    // 扁平化并排序
+    const mergedMessages = Array.from(messageMap.values())
+      .flat()
+      .sort((a, b) => a.timestamp - b.timestamp)
+
+    // 确定平台
+    const platforms = new Set(parseResults.map((r) => r.result.meta.platform))
+    const platform = platforms.size === 1 ? parseResults[0].result.meta.platform : 'mixed'
+
+    // 构建来源信息
+    const sources: MergeSource[] = parseResults.map(({ result, source }) => ({
+      filename: source,
+      platform: result.meta.platform,
+      messageCount: result.messages.length,
+    }))
+
+    // 构建 ChatLab 格式
+    const chatLabData: ChatLabFormat = {
+      chatlab: {
+        version: '1.0.0',
+        exportedAt: Math.floor(Date.now() / 1000),
+        generator: 'ChatLab Merge Tool',
+      },
+      meta: {
+        name: outputName,
+        platform: platform as ChatPlatform,
+        type: parseResults[0].result.meta.type as ChatType,
+        sources,
+      },
+      members: Array.from(memberMap.values()),
+      messages: mergedMessages,
+    }
+
+    // 写入文件
+    const targetDir = outputDir || getDefaultOutputDir()
+    ensureOutputDir(targetDir)
+    const filename = generateOutputFilename(outputName)
+    const outputPath = path.join(targetDir, filename)
+
+    fs.writeFileSync(outputPath, JSON.stringify(chatLabData, null, 2), 'utf-8')
+
+    // 如果需要分析，导入数据库
+    let sessionId: string | undefined
+    if (andAnalyze) {
+      // 将 ChatLab 格式转换为 ParseResult
+      const parseResult: ParseResult = {
+        meta: {
+          name: chatLabData.meta.name,
+          platform: chatLabData.meta.platform,
+          type: chatLabData.meta.type,
+        },
+        members: chatLabData.members.map((m) => ({
+          platformId: m.platformId,
+          name: m.name,
+        })),
+        messages: chatLabData.messages.map((msg) => ({
+          senderPlatformId: msg.sender,
+          senderName: msg.name,
+          timestamp: msg.timestamp,
+          type: msg.type,
+          content: msg.content,
+        })),
+      }
+      sessionId = importData(parseResult)
+    }
+
+    return {
+      success: true,
+      outputPath,
+      sessionId,
+    }
+  } catch (err) {
+    return {
+      success: false,
+      error: err instanceof Error ? err.message : '合并失败',
+    }
+  }
+}
@@ -0,0 +1,84 @@
+/**
+ * ChatLab 专属 JSON 格式解析器
+ * 支持 ChatLab 工具导出的统一格式
+ */
+
+import type { ChatParser } from './types'
+import {
+  ChatPlatform,
+  ChatType,
+  type ParseResult,
+  type ParsedMember,
+  type ParsedMessage,
+  type ChatLabFormat,
+} from '../../../src/types/chat'
+
+/**
+ * ChatLab JSON 格式解析器
+ */
+export const chatlabJsonParser: ChatParser = {
+  name: 'ChatLab JSON',
+  platform: 'chatlab',
+
+  detect(content: string, filename: string): boolean {
+    // 检查文件扩展名
+    if (!filename.toLowerCase().endsWith('.json') && !filename.toLowerCase().endsWith('.chatlab.json')) {
+      return false
+    }
+
+    try {
+      const data = JSON.parse(content)
+      // 检查是否有 ChatLab 格式特征
+      return (
+        data.chatlab &&
+        typeof data.chatlab.version === 'string' &&
+        data.meta &&
+        Array.isArray(data.members) &&
+        Array.isArray(data.messages)
+      )
+    } catch {
+      return false
+    }
+  },
+
+  parse(content: string, _filename: string): ParseResult {
+    let data: ChatLabFormat
+    try {
+      data = JSON.parse(content)
+    } catch (e) {
+      throw new Error(`JSON 解析失败: ${e}`)
+    }
+
+    if (!data.chatlab || !data.meta || !Array.isArray(data.messages)) {
+      throw new Error('无效的 ChatLab JSON 格式')
+    }
+
+    // 解析元信息
+    const meta = {
+      name: data.meta.name,
+      platform: (data.meta.platform as ChatPlatform) || ChatPlatform.UNKNOWN,
+      type: (data.meta.type as ChatType) || ChatType.GROUP,
+    }
+
+    // 解析成员
+    const members: ParsedMember[] = data.members.map((m) => ({
+      platformId: m.platformId,
+      name: m.name,
+    }))
+
+    // 解析消息
+    const messages: ParsedMessage[] = data.messages.map((msg) => ({
+      senderPlatformId: msg.sender,
+      senderName: msg.name,
+      timestamp: msg.timestamp,
+      type: msg.type,
+      content: msg.content,
+    }))
+
+    return {
+      meta,
+      members,
+      messages,
+    }
+  },
+}
@@ -5,14 +5,16 @@

 import * as fs from 'fs'
 import type { ChatParser } from './types'
+import { chatlabJsonParser } from './chatlabJsonParser'
 import { qqJsonParser } from './qqJsonParser'
 import { qqTxtParser } from './qqTxtParser'
 import type { ParseResult } from '../../../src/types/chat'

 // 注册所有解析器（按优先级排序）
 const parsers: ChatParser[] = [
-  qqJsonParser, // JSON 格式优先
-  qqTxtParser // TXT 格式兜底
+  chatlabJsonParser, // ChatLab 格式最优先
+  qqJsonParser, // QQ JSON 格式
+  qqTxtParser, // TXT 格式兜底
 ]

 /**
@@ -64,10 +66,9 @@ export function detectFormat(filePath: string): string | null {
 export function getSupportedFormats(): Array<{ name: string; platform: string }> {
  return parsers.map((p) => ({
    name: p.name,
-    platform: p.platform
+    platform: p.platform,
  }))
 }

 // 导出类型
 export type { ChatParser, ParseError } from './types'
-
@@ -36,6 +36,28 @@ import {
  getMemeBattleAnalysis,
  getCheckInAnalysis,
 } from './queryAdvanced'
+import { parseFile, detectFormat } from '../parser'
+import type { FileParseInfo } from '../../../src/types/chat'
+
+/**
+ * 解析文件获取基本信息（在 Worker 线程中执行，不阻塞主进程）
+ */
+function parseFileInfo(filePath: string): FileParseInfo {
+  const format = detectFormat(filePath)
+  if (!format) {
+    throw new Error('无法识别文件格式')
+  }
+
+  const result = parseFile(filePath)
+
+  return {
+    name: result.meta.name,
+    format,
+    platform: result.meta.platform,
+    messageCount: result.messages.length,
+    memberCount: result.members.length,
+  }
+}

 // 初始化数据库目录
 initDbDir(workerData.dbDir)
@@ -50,6 +72,9 @@ interface WorkerMessage {

 // 消息类型到处理函数的映射
 const handlers: Record<string, (payload: any) => any> = {
+  // 文件解析（合并功能使用）
+  parseFileInfo: (p) => parseFileInfo(p.filePath),
+
  // 基础查询
  getAvailableYears: (p) => getAvailableYears(p.sessionId),
  getMemberActivity: (p) => getMemberActivity(p.sessionId, p.filter),
@@ -31,4 +31,6 @@ export {
  getAllSessions,
  getSession,
  closeDatabase,
+  // 文件解析 API（异步，用于合并功能）
+  parseFileInfo,
 } from './workerManager'
@@ -259,6 +259,13 @@ export async function closeDatabase(sessionId: string): Promise<void> {
  return sendToWorker('closeDatabase', { sessionId })
 }

+/**
+ * 解析文件获取基本信息（在 Worker 线程中执行）
+ */
+export async function parseFileInfo(filePath: string): Promise<any> {
+  return sendToWorker('parseFileInfo', { filePath })
+}
+
 /**
 * 获取数据库目录（供外部使用）
 */