feat: 不同平台的聊天数据支持合并(resolve #74)

This commit is contained in:
digua
2026-04-13 23:35:02 +08:00
committed by digua
parent beea029299
commit ad4afc77d7
6 changed files with 142 additions and 46 deletions
+103 -35
View File
@@ -73,13 +73,14 @@ export async function parseFileInfo(filePath: string): Promise<FileParseInfo> {
/** /**
* 生成消息的唯一标识(用于去重和冲突检测) * 生成消息的唯一标识(用于去重和冲突检测)
*/ */
function getMessageKey(msg: ParsedMessage): string { function getMessageKey(msg: ParsedMessage, senderPlatformIdOverride?: string): string {
// 合并链路的去重语义需要和增量导入保持一致,否则两条链路会对重复消息得出不同结论。 // 合并链路的去重语义需要和增量导入保持一致,否则两条链路会对重复消息得出不同结论。
const normalizedContent = msg.content || null const normalizedContent = msg.content || null
const senderPlatformId = senderPlatformIdOverride || msg.senderPlatformId
const hash = createHash('sha256') const hash = createHash('sha256')
hash.update(String(msg.timestamp)) hash.update(String(msg.timestamp))
hash.update('\0') hash.update('\0')
hash.update(msg.senderPlatformId) hash.update(senderPlatformId)
hash.update('\0') hash.update('\0')
hash.update(normalizedContent === null ? 'null' : 'text') hash.update(normalizedContent === null ? 'null' : 'text')
hash.update('\0') hash.update('\0')
@@ -93,6 +94,54 @@ function getParsedMessageDisplayName(msg: ParsedMessage): string {
return msg.senderGroupNickname || msg.senderAccountName || msg.senderPlatformId return msg.senderGroupNickname || msg.senderAccountName || msg.senderPlatformId
} }
function getCollidingPlatformIds(
sources: Array<{ platform: string; members: Array<{ platformId: string }> }>
): Set<string> {
const memberPlatformMap = new Map<string, Set<string>>()
for (const source of sources) {
for (const member of source.members) {
if (!memberPlatformMap.has(member.platformId)) {
memberPlatformMap.set(member.platformId, new Set())
}
memberPlatformMap.get(member.platformId)!.add(source.platform || 'unknown')
}
}
const collidingIds = new Set<string>()
for (const [platformId, platforms] of memberPlatformMap) {
if (platforms.size > 1) {
collidingIds.add(platformId)
}
}
return collidingIds
}
function normalizePlatformId(platformId: string, platform: string, collidingIds: Set<string>): string {
if (!collidingIds.has(platformId)) return platformId
return `${platform || 'unknown'}:${platformId}`
}
function getCollidingPlatformIdsFromMessages(
allMessages: Array<{ msg: ParsedMessage; source: string; platform: string }>
): Set<string> {
const memberPlatformMap = new Map<string, Set<string>>()
for (const item of allMessages) {
const platformId = item.msg.senderPlatformId
if (!memberPlatformMap.has(platformId)) {
memberPlatformMap.set(platformId, new Set())
}
memberPlatformMap.get(platformId)!.add(item.platform || 'unknown')
}
const collidingIds = new Set<string>()
for (const [platformId, platforms] of memberPlatformMap) {
if (platforms.size > 1) {
collidingIds.add(platformId)
}
}
return collidingIds
}
/** /**
* 检查消息是否是纯图片消息 * 检查消息是否是纯图片消息
* 纯图片消息格式如:[图片: xxx.jpg]、[图片: {xxx}.jpg] 等 * 纯图片消息格式如:[图片: xxx.jpg]、[图片: {xxx}.jpg] 等
@@ -104,11 +153,13 @@ function isImageOnlyMessage(content: string | undefined): boolean {
} }
function detectConflictsInMessages( function detectConflictsInMessages(
allMessages: Array<{ msg: ParsedMessage; source: string }>, allMessages: Array<{ msg: ParsedMessage; source: string; platform: string }>,
conflicts: MergeConflict[] conflicts: MergeConflict[]
): ConflictCheckResult { ): ConflictCheckResult {
const collidingIds = getCollidingPlatformIdsFromMessages(allMessages)
// 按时间戳分组检测冲突 // 按时间戳分组检测冲突
const timeGroups = new Map<number, Array<{ msg: ParsedMessage; source: string }>>() const timeGroups = new Map<number, Array<{ msg: ParsedMessage; source: string; platform: string }>>()
for (const item of allMessages) { for (const item of allMessages) {
const ts = item.msg.timestamp const ts = item.msg.timestamp
if (!timeGroups.has(ts)) { if (!timeGroups.has(ts)) {
@@ -133,9 +184,9 @@ function detectConflictsInMessages(
if (items.length < 2) continue if (items.length < 2) continue
// 按发送者分组 // 按发送者分组
const senderGroups = new Map<string, Array<{ msg: ParsedMessage; source: string }>>() const senderGroups = new Map<string, Array<{ msg: ParsedMessage; source: string; platform: string }>>()
for (const item of items) { for (const item of items) {
const sender = item.msg.senderPlatformId const sender = normalizePlatformId(item.msg.senderPlatformId, item.platform || 'unknown', collidingIds)
if (!senderGroups.has(sender)) { if (!senderGroups.has(sender)) {
senderGroups.set(sender, []) senderGroups.set(sender, [])
} }
@@ -154,7 +205,7 @@ function detectConflictsInMessages(
} }
// 按内容分组(完全相同的内容会被分到一组,自动去重) // 按内容分组(完全相同的内容会被分到一组,自动去重)
const contentGroups = new Map<string, Array<{ msg: ParsedMessage; source: string }>>() const contentGroups = new Map<string, Array<{ msg: ParsedMessage; source: string; platform: string }>>()
for (const item of senderItems) { for (const item of senderItems) {
const content = item.msg.content || '' const content = item.msg.content || ''
if (!contentGroups.has(content)) { if (!contentGroups.has(content)) {
@@ -232,7 +283,8 @@ function detectConflictsInMessages(
// 计算去重后的消息数 // 计算去重后的消息数
const uniqueKeys = new Set<string>() const uniqueKeys = new Set<string>()
for (const item of allMessages) { for (const item of allMessages) {
uniqueKeys.add(getMessageKey(item.msg)) const normalizedSenderId = normalizePlatformId(item.msg.senderPlatformId, item.platform || 'unknown', collidingIds)
uniqueKeys.add(getMessageKey(item.msg, normalizedSenderId))
} }
console.log(`[Merger] Messages after dedup: ${uniqueKeys.size}`) console.log(`[Merger] Messages after dedup: ${uniqueKeys.size}`)
@@ -286,17 +338,26 @@ async function executeMerge(
_conflictResolutions: MergeParams['conflictResolutions'], _conflictResolutions: MergeParams['conflictResolutions'],
andAnalyze: boolean andAnalyze: boolean
): Promise<MergeResult> { ): Promise<MergeResult> {
const collidingIds = getCollidingPlatformIds(
parseResults.map(({ result }) => ({
platform: result.meta.platform || 'unknown',
members: result.members.map((m) => ({ platformId: m.platformId })),
}))
)
const memberMap = new Map<string, ChatLabMember>() const memberMap = new Map<string, ChatLabMember>()
for (const { result } of parseResults) { for (const { result } of parseResults) {
const sourcePlatform = result.meta.platform || 'unknown'
for (const member of result.members) { for (const member of result.members) {
const existing = memberMap.get(member.platformId) const normalizedMemberPlatformId = normalizePlatformId(member.platformId, sourcePlatform, collidingIds)
const existing = memberMap.get(normalizedMemberPlatformId)
if (existing) { if (existing) {
if (member.accountName) existing.accountName = member.accountName if (member.accountName) existing.accountName = member.accountName
if (member.groupNickname) existing.groupNickname = member.groupNickname if (member.groupNickname) existing.groupNickname = member.groupNickname
if (member.avatar) existing.avatar = member.avatar if (member.avatar) existing.avatar = member.avatar
} else { } else {
memberMap.set(member.platformId, { memberMap.set(normalizedMemberPlatformId, {
platformId: member.platformId, platformId: normalizedMemberPlatformId,
accountName: member.accountName, accountName: member.accountName,
groupNickname: member.groupNickname, groupNickname: member.groupNickname,
avatar: member.avatar, avatar: member.avatar,
@@ -308,13 +369,15 @@ async function executeMerge(
const seenKeys = new Set<string>() const seenKeys = new Set<string>()
const mergedMessages: ChatLabMessage[] = [] const mergedMessages: ChatLabMessage[] = []
for (const { result } of parseResults) { for (const { result } of parseResults) {
const sourcePlatform = result.meta.platform || 'unknown'
for (const msg of result.messages) { for (const msg of result.messages) {
const key = getMessageKey(msg) const normalizedSenderPlatformId = normalizePlatformId(msg.senderPlatformId, sourcePlatform, collidingIds)
const key = getMessageKey(msg, normalizedSenderPlatformId)
if (seenKeys.has(key)) continue if (seenKeys.has(key)) continue
seenKeys.add(key) seenKeys.add(key)
mergedMessages.push({ mergedMessages.push({
sender: msg.senderPlatformId, sender: normalizedSenderPlatformId,
accountName: msg.senderAccountName, accountName: msg.senderAccountName,
groupNickname: msg.senderGroupNickname, groupNickname: msg.senderGroupNickname,
timestamp: msg.timestamp, timestamp: msg.timestamp,
@@ -346,9 +409,12 @@ async function executeMerge(
description: `合并自 ${parseResults.length} 个文件`, description: `合并自 ${parseResults.length} 个文件`,
} }
const uniquePlatforms = [...new Set(parseResults.map(({ result }) => result.meta.platform || 'unknown'))]
const mergedPlatform = uniquePlatforms.length === 1 ? uniquePlatforms[0] : 'mixed'
const chatLabMeta = { const chatLabMeta = {
name: outputName, name: outputName,
platform: parseResults[0].result.meta.platform as ChatPlatform, platform: mergedPlatform as ChatPlatform,
type: parseResults[0].result.meta.type as ChatType, type: parseResults[0].result.meta.type as ChatType,
sources, sources,
groupId, groupId,
@@ -410,7 +476,7 @@ export async function checkConflictsWithTempDb(
filePaths: string[], filePaths: string[],
tempDbCache: Map<string, string> tempDbCache: Map<string, string>
): Promise<ConflictCheckResult> { ): Promise<ConflictCheckResult> {
const allMessages: Array<{ msg: ParsedMessage; source: string }> = [] const allMessages: Array<{ msg: ParsedMessage; source: string; platform: string }> = []
const conflicts: MergeConflict[] = [] const conflicts: MergeConflict[] = []
console.log('[Merger] checkConflictsWithTempDb: Checking conflicts') console.log('[Merger] checkConflictsWithTempDb: Checking conflicts')
@@ -443,23 +509,13 @@ export async function checkConflictsWithTempDb(
// 流式读取消息,避免一次性加载到内存 // 流式读取消息,避免一次性加载到内存
reader.streamMessages(10000, (messages) => { reader.streamMessages(10000, (messages) => {
for (const msg of messages) { for (const msg of messages) {
allMessages.push({ msg, source: sourceName }) allMessages.push({ msg, source: sourceName, platform: meta?.platform || 'unknown' })
} }
}) })
} }
console.log(`[Merger] Total messages: ${allMessages.length}`) console.log(`[Merger] Total messages: ${allMessages.length}`)
// 检查格式一致性
const platforms = readers.map((r) => r.getMeta()?.platform || 'unknown')
const uniquePlatforms = [...new Set(platforms)]
if (uniquePlatforms.length > 1) {
throw new Error(
`不支持合并不同格式的聊天记录。\n检测到的格式:${uniquePlatforms.join('、')}\n请确保所有文件使用相同的导出工具和格式。`
)
}
console.log('[Merger] Format check passed:', uniquePlatforms[0])
return detectConflictsInMessages(allMessages, conflicts) return detectConflictsInMessages(allMessages, conflicts)
} finally { } finally {
// 关闭所有 reader // 关闭所有 reader
@@ -519,11 +575,20 @@ export async function mergeFilesWithTempDb(
parseResults.push({ meta, members, source: sourceName, reader }) parseResults.push({ meta, members, source: sourceName, reader })
} }
const collidingIds = getCollidingPlatformIds(
parseResults.map(({ meta, members }) => ({
platform: meta.platform || 'unknown',
members: members.map((m) => ({ platformId: m.platformId })),
}))
)
// 合并成员 // 合并成员
const memberMap = new Map<string, ChatLabMember>() const memberMap = new Map<string, ChatLabMember>()
for (const { members } of parseResults) { for (const { meta, members } of parseResults) {
const sourcePlatform = meta.platform || 'unknown'
for (const member of members) { for (const member of members) {
const existing = memberMap.get(member.platformId) const normalizedMemberPlatformId = normalizePlatformId(member.platformId, sourcePlatform, collidingIds)
const existing = memberMap.get(normalizedMemberPlatformId)
if (existing) { if (existing) {
if (member.accountName) { if (member.accountName) {
existing.accountName = member.accountName existing.accountName = member.accountName
@@ -536,8 +601,8 @@ export async function mergeFilesWithTempDb(
existing.avatar = member.avatar existing.avatar = member.avatar
} }
} else { } else {
memberMap.set(member.platformId, { memberMap.set(normalizedMemberPlatformId, {
platformId: member.platformId, platformId: normalizedMemberPlatformId,
accountName: member.accountName, accountName: member.accountName,
groupNickname: member.groupNickname, groupNickname: member.groupNickname,
avatar: member.avatar, avatar: member.avatar,
@@ -553,13 +618,15 @@ export async function mergeFilesWithTempDb(
let totalProcessed = 0 let totalProcessed = 0
const startTime = Date.now() const startTime = Date.now()
for (const { reader, source } of parseResults) { for (const { meta, reader, source } of parseResults) {
const sourcePlatform = meta.platform || 'unknown'
const readerStartTime = Date.now() const readerStartTime = Date.now()
let readerCount = 0 let readerCount = 0
reader.streamMessages(10000, (messages) => { reader.streamMessages(10000, (messages) => {
for (const msg of messages) { for (const msg of messages) {
const key = getMessageKey(msg) const normalizedSenderPlatformId = normalizePlatformId(msg.senderPlatformId, sourcePlatform, collidingIds)
const key = getMessageKey(msg, normalizedSenderPlatformId)
// 跳过已处理的消息(去重) // 跳过已处理的消息(去重)
if (seenKeys.has(key)) { if (seenKeys.has(key)) {
@@ -571,7 +638,7 @@ export async function mergeFilesWithTempDb(
// 决定了哪个版本的消息先被处理,后续相同 key 的消息会被跳过 // 决定了哪个版本的消息先被处理,后续相同 key 的消息会被跳过
mergedMessages.push({ mergedMessages.push({
sender: msg.senderPlatformId, sender: normalizedSenderPlatformId,
accountName: msg.senderAccountName, accountName: msg.senderAccountName,
groupNickname: msg.senderGroupNickname, groupNickname: msg.senderGroupNickname,
timestamp: msg.timestamp, timestamp: msg.timestamp,
@@ -596,8 +663,9 @@ export async function mergeFilesWithTempDb(
console.log(`[Merger] Messages after merge: ${mergedMessages.length}`) console.log(`[Merger] Messages after merge: ${mergedMessages.length}`)
// 确定平台(使用第一个文件的平台 // 确定平台(跨平台时标记为 mixed
const platform = parseResults[0].meta.platform const uniquePlatforms = [...new Set(parseResults.map((r) => r.meta.platform || 'unknown'))]
const platform = uniquePlatforms.length === 1 ? uniquePlatforms[0] : 'mixed'
// 确定群ID和群头像(仅当所有文件都来自同一个群时保留) // 确定群ID和群头像(仅当所有文件都来自同一个群时保留)
const groupIds = new Set(parseResults.map((r) => r.meta.groupId).filter(Boolean)) const groupIds = new Set(parseResults.map((r) => r.meta.groupId).filter(Boolean))
+2 -1
View File
@@ -32,9 +32,10 @@
}, },
"clickToEdit": "Click to edit name", "clickToEdit": "Click to edit name",
"merge": "Merge Selected", "merge": "Merge Selected",
"mergeHint": "Select 2 or more chat records from the same platform to merge", "mergeHint": "Select at least 2 chat records of the same chat type to merge",
"mergeConfirmTitle": "Merge Chat Records", "mergeConfirmTitle": "Merge Chat Records",
"mergeConfirmMessage": "This will merge {count} selected chat records. The original records will be deleted and a new merged record will be created.", "mergeConfirmMessage": "This will merge {count} selected chat records. The original records will be deleted and a new merged record will be created.",
"mergeRiskWarning": "Critical warning: this merge cannot be undone, and cross-platform context differences may cause semantic confusion. Please back up your original data before continuing.",
"mergeSteps": { "mergeSteps": {
"exporting": "Exporting chat records...", "exporting": "Exporting chat records...",
"parsing": "Parsing files...", "parsing": "Parsing files...",
+2 -1
View File
@@ -32,9 +32,10 @@
}, },
"clickToEdit": "クリックして名前を編集", "clickToEdit": "クリックして名前を編集",
"merge": "選択項目を結合", "merge": "選択項目を結合",
"mergeHint": "結合するには同じプラットフォームのチャット履歴を 2 件以上選択してください", "mergeHint": "結合するには同じチャット種別の履歴を 2 件以上選択してください",
"mergeConfirmTitle": "チャット履歴を結合", "mergeConfirmTitle": "チャット履歴を結合",
"mergeConfirmMessage": "選択した {count} 件のチャット履歴を結合します。元の履歴は削除され、新しい統合履歴が作成されます。", "mergeConfirmMessage": "選択した {count} 件のチャット履歴を結合します。元の履歴は削除され、新しい統合履歴が作成されます。",
"mergeRiskWarning": "重要な警告:この結合は元に戻せず、プラットフォーム間の文脈差により意味が混在する可能性があります。続行前に元データをバックアップしてください。",
"mergeSteps": { "mergeSteps": {
"exporting": "チャット履歴をエクスポート中...", "exporting": "チャット履歴をエクスポート中...",
"parsing": "ファイルを解析中...", "parsing": "ファイルを解析中...",
+3 -2
View File
@@ -32,9 +32,10 @@
}, },
"clickToEdit": "点击编辑名称", "clickToEdit": "点击编辑名称",
"merge": "合并选中", "merge": "合并选中",
"mergeHint": "选择 2 个以上同平台的聊天记录才能合并", "mergeHint": "选择 2 个以上同聊天类型的聊天记录才能合并",
"mergeConfirmTitle": "合并聊天记录", "mergeConfirmTitle": "合并聊天记录",
"mergeConfirmMessage": "将合并选中的 {count} 个聊天记录。合并后原记录被删除,生成一条新的合并记录。", "mergeConfirmMessage": "将合并选中的 {count} 个聊天记录。合并后原记录被删除,生成一条新的合并记录。",
"mergeRiskWarning": "强提醒:合并后不可恢复,且由于跨平台上下文差异,聊天语义可能出现错位或混乱。请确认你已备份原始数据并理解风险。",
"mergeSteps": { "mergeSteps": {
"exporting": "正在导出聊天记录...", "exporting": "正在导出聊天记录...",
"parsing": "正在解析文件...", "parsing": "正在解析文件...",
+2 -1
View File
@@ -32,9 +32,10 @@
}, },
"clickToEdit": "點擊即可編輯名稱", "clickToEdit": "點擊即可編輯名稱",
"merge": "合併所選項目", "merge": "合併所選項目",
"mergeHint": "選擇 2 個以上同平台的聊天紀錄才能合併", "mergeHint": "選擇 2 個以上同聊天類型的聊天紀錄才能合併",
"mergeConfirmTitle": "合併聊天紀錄", "mergeConfirmTitle": "合併聊天紀錄",
"mergeConfirmMessage": "即將合併所選的 {count} 個聊天紀錄。合併後原始紀錄會被刪除,並建立一筆新的合併紀錄。", "mergeConfirmMessage": "即將合併所選的 {count} 個聊天紀錄。合併後原始紀錄會被刪除,並建立一筆新的合併紀錄。",
"mergeRiskWarning": "強提醒:合併後無法復原,且跨平台上下文差異可能導致語意錯位或混亂。請先備份原始資料並確認你已理解風險。",
"mergeSteps": { "mergeSteps": {
"exporting": "正在匯出聊天紀錄...", "exporting": "正在匯出聊天紀錄...",
"parsing": "正在解析檔案...", "parsing": "正在解析檔案...",
@@ -194,12 +194,13 @@ const editingId = ref<string | null>(null)
// 编辑中的名称 // 编辑中的名称
const editingName = ref('') const editingName = ref('')
// 是否可以合并(选中 2 个以上同平台会话) const selectedMergeSessions = computed(() => sessions.value.filter((s) => selectedIds.value.has(s.id)))
const selectedMergeTypes = computed(() => new Set(selectedMergeSessions.value.map((s) => s.type)))
// 是否可以合并(选中 2 个以上同类型会话)
const canMerge = computed(() => { const canMerge = computed(() => {
if (selectedIds.value.size < 2) return false if (selectedIds.value.size < 2) return false
const selectedSessions = sessions.value.filter((s) => selectedIds.value.has(s.id)) return selectedMergeTypes.value.size === 1
const platforms = new Set(selectedSessions.map((s) => s.platform))
return platforms.size === 1
}) })
// 全选状态(基于过滤后的列表) // 全选状态(基于过滤后的列表)
@@ -269,6 +270,16 @@ function handleRowClick(index: number, id: string, event: MouseEvent) {
lastClickedIndex.value = index lastClickedIndex.value = index
} }
function handleRowMouseDown(event: MouseEvent) {
if (!event.shiftKey) return
const target = event.target as HTMLElement | null
if (target?.closest('input, textarea, [contenteditable="true"]')) return
// 避免浏览器默认的 Shift 文本范围选择,防止误选中行内文字
event.preventDefault()
}
// 判断是否选中 // 判断是否选中
function isSelected(id: string): boolean { function isSelected(id: string): boolean {
return selectedIds.value.has(id) return selectedIds.value.has(id)
@@ -606,6 +617,7 @@ onMounted(() => {
isSelected(session.id) ? 'bg-pink-50 dark:bg-pink-900/20' : '', isSelected(session.id) ? 'bg-pink-50 dark:bg-pink-900/20' : '',
index !== sortedSessions.length - 1 ? 'border-b border-gray-100 dark:border-gray-800' : '', index !== sortedSessions.length - 1 ? 'border-b border-gray-100 dark:border-gray-800' : '',
]" ]"
@mousedown="handleRowMouseDown"
@click="handleRowClick(index, session.id, $event)" @click="handleRowClick(index, session.id, $event)"
> >
<!-- 复选框 --> <!-- 复选框 -->
@@ -694,7 +706,7 @@ onMounted(() => {
</div> </div>
<!-- 合并确认弹窗 --> <!-- 合并确认弹窗 -->
<UModal v-model:open="showMergeModal"> <UModal v-model:open="showMergeModal" :ui="{ content: 'z-100' }">
<template #content> <template #content>
<div class="p-4"> <div class="p-4">
<div class="mb-4 flex items-center gap-3"> <div class="mb-4 flex items-center gap-3">
@@ -710,10 +722,22 @@ onMounted(() => {
{{ t('tools.batchManage.mergeConfirmMessage', { count: selectedIds.size }) }} {{ t('tools.batchManage.mergeConfirmMessage', { count: selectedIds.size }) }}
</p> </p>
<!-- 强提醒 -->
<div
class="mb-4 rounded-lg border border-red-200 bg-red-50 px-3 py-2 dark:border-red-800/50 dark:bg-red-900/20"
>
<div class="flex items-start gap-2">
<UIcon name="i-heroicons-exclamation-triangle" class="mt-0.5 h-4 w-4 shrink-0 text-red-600" />
<p class="text-xs leading-5 text-red-700 dark:text-red-300">
{{ t('tools.batchManage.mergeRiskWarning') }}
</p>
</div>
</div>
<!-- 选中的会话预览 --> <!-- 选中的会话预览 -->
<div class="mb-4 max-h-40 overflow-y-auto rounded-lg border border-gray-200 dark:border-gray-700"> <div class="mb-4 max-h-40 overflow-y-auto rounded-lg border border-gray-200 dark:border-gray-700">
<div <div
v-for="session in sessions.filter((s) => selectedIds.has(s.id))" v-for="session in selectedMergeSessions"
:key="session.id" :key="session.id"
class="flex items-center gap-2 border-b border-gray-100 px-3 py-2 last:border-b-0 dark:border-gray-800" class="flex items-center gap-2 border-b border-gray-100 px-3 py-2 last:border-b-0 dark:border-gray-800"
> >