Files
ChatLab/electron/main/parser/formats/ycccccccy-echotrace.ts
T

323 lines
8.8 KiB
TypeScript

/**
* ycccccccy/echotrace 微信导出格式解析器
* 适配项目: https://github.com/ycccccccy/echotrace
*
* 特征:
* - 顶层包含 session 和 messages 字段
* - session.wxid: 微信ID(群聊以 @chatroom 结尾)
* - session.type: "群聊" 或 "私聊"
* - messages[].type: 中文消息类型字符串
* - messages[].senderUsername: 发送者微信ID
* - messages[].senderDisplayName: 发送者显示名
*
* 注意:localType 字段不可信,不使用
*/
import * as fs from 'fs'
import * as path from 'path'
import { parser } from 'stream-json'
import { pick } from 'stream-json/filters/Pick'
import { streamValues } from 'stream-json/streamers/StreamValues'
import { chain } from 'stream-chain'
import { ChatPlatform, ChatType, MessageType } from '../../../../src/types/chat'
import type {
FormatFeature,
FormatModule,
Parser,
ParseOptions,
ParseEvent,
ParsedMeta,
ParsedMember,
ParsedMessage,
} from '../types'
import { getFileSize, createProgress, readFileHeadBytes } from '../utils'
// ==================== 辅助函数 ====================
/**
* 从文件名提取聊天名称
*/
function extractNameFromFilePath(filePath: string): string {
const basename = path.basename(filePath)
const name = basename.replace(/\.json$/i, '')
return name || '未知聊天'
}
// ==================== 特征定义 ====================
export const feature: FormatFeature = {
id: 'ycccccccy-echotrace',
name: 'ycccccccy/echotrace 微信导出',
platform: ChatPlatform.WECHAT,
priority: 15,
extensions: ['.json'],
signatures: {
// 检测顶层字段和特征
head: [/"session"\s*:/, /"senderUsername"\s*:/, /"senderDisplayName"\s*:/],
requiredFields: ['session', 'messages'],
},
}
// ==================== 消息结构 ====================
interface EchotraceSession {
wxid: string
nickname: string
remark: string
displayName: string
type: '群聊' | '私聊'
lastTimestamp: number
messageCount: number
}
interface EchotraceMessage {
localId: number
createTime: number // Unix 时间戳(秒)
formattedTime: string
type: string // 中文消息类型
localType: number // 不可信,不使用
content: string
isSend: number | null // 0=接收, 1=发送, null=系统
senderUsername: string // 发送者微信ID
senderDisplayName: string // 发送者显示名
source: string
}
// ==================== 消息类型映射 ====================
/**
* 将 echotrace 中文消息类型转换为标准 MessageType
*/
function convertMessageType(typeStr: string): MessageType {
switch (typeStr) {
case '文本消息':
return MessageType.TEXT
case '图片消息':
return MessageType.IMAGE
case '语音消息':
return MessageType.VOICE
case '视频消息':
return MessageType.VIDEO
case '文件消息':
return MessageType.FILE
case '动画表情':
return MessageType.EMOJI
case '名片消息':
return MessageType.CONTACT
case '卡片式链接':
case '图文消息':
return MessageType.LINK
case '位置消息':
return MessageType.LOCATION
case '红包卡片':
return MessageType.RED_PACKET
case '转账卡片':
return MessageType.TRANSFER
case '小程序分享':
case '视频号直播卡片':
return MessageType.SHARE
case '引用消息':
return MessageType.REPLY
case '聊天记录合并转发':
return MessageType.FORWARD
case '系统消息':
return MessageType.SYSTEM
default:
// 未知类型(xxxxx) 或其他
return MessageType.OTHER
}
}
// ==================== 成员信息追踪 ====================
interface MemberInfo {
platformId: string
accountName: string
}
// ==================== 解析器实现 ====================
async function* parseEchotrace(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
const { filePath, batchSize = 5000, onProgress } = options
const totalBytes = getFileSize(filePath)
let bytesRead = 0
let messagesProcessed = 0
// 发送初始进度
const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...')
yield { type: 'progress', data: initialProgress }
onProgress?.(initialProgress)
// 读取文件头获取 session 信息
const headContent = readFileHeadBytes(filePath, 2000)
// 解析 session
let session: EchotraceSession | null = null
try {
const sessionMatch = headContent.match(/"session"\s*:\s*(\{[^}]+\})/)
if (sessionMatch) {
session = JSON.parse(sessionMatch[1])
}
} catch {
// 使用默认值
}
// 确定聊天类型
// 1. 优先使用 session.type
// 2. 或者通过 wxid 是否以 @chatroom 结尾判断
let chatType = ChatType.GROUP
if (session) {
if (session.type === '私聊') {
chatType = ChatType.PRIVATE
} else if (session.type === '群聊') {
chatType = ChatType.GROUP
} else if (session.wxid && !session.wxid.endsWith('@chatroom')) {
chatType = ChatType.PRIVATE
}
}
// 确定聊天名称
const chatName = session?.displayName || session?.nickname || extractNameFromFilePath(filePath)
// 发送 meta
const meta: ParsedMeta = {
name: chatName,
platform: ChatPlatform.WECHAT,
type: chatType,
}
yield { type: 'meta', data: meta }
// 收集成员和消息
const memberMap = new Map<string, MemberInfo>()
let messageBatch: ParsedMessage[] = []
// 流式解析
await new Promise<void>((resolve, reject) => {
const readStream = fs.createReadStream(filePath, { encoding: 'utf-8' })
readStream.on('data', (chunk: string | Buffer) => {
bytesRead += typeof chunk === 'string' ? Buffer.byteLength(chunk) : chunk.length
})
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamValues()])
const processMessage = (msg: EchotraceMessage): ParsedMessage | null => {
// 验证必要字段
if (!msg.senderUsername || msg.createTime === undefined) {
return null
}
const platformId = msg.senderUsername
const accountName = msg.senderDisplayName || platformId
// 更新成员信息
if (!memberMap.has(platformId)) {
memberMap.set(platformId, {
platformId,
accountName,
})
} else {
// 更新为最新的显示名
const existing = memberMap.get(platformId)!
existing.accountName = accountName
}
// 转换消息类型
const type = convertMessageType(msg.type)
return {
senderPlatformId: platformId,
senderAccountName: accountName,
// echotrace 格式没有单独的群昵称字段
senderGroupNickname: undefined,
timestamp: msg.createTime,
type,
content: msg.content || null,
}
}
// 用于收集批次的临时数组
const batchCollector: ParsedMessage[] = []
pipeline.on('data', ({ value }: { value: EchotraceMessage }) => {
const parsed = processMessage(value)
if (parsed) {
batchCollector.push(parsed)
messagesProcessed++
// 达到批次大小
if (batchCollector.length >= batchSize) {
messageBatch.push(...batchCollector)
batchCollector.length = 0
const progress = createProgress(
'parsing',
bytesRead,
totalBytes,
messagesProcessed,
`已处理 ${messagesProcessed} 条消息...`
)
onProgress?.(progress)
}
}
})
pipeline.on('end', () => {
// 收集剩余消息
if (batchCollector.length > 0) {
messageBatch.push(...batchCollector)
}
resolve()
})
pipeline.on('error', reject)
})
// 发送成员
const members: ParsedMember[] = Array.from(memberMap.values()).map((m) => ({
platformId: m.platformId,
accountName: m.accountName,
}))
yield { type: 'members', data: members }
// 分批发送消息
for (let i = 0; i < messageBatch.length; i += batchSize) {
const batch = messageBatch.slice(i, i + batchSize)
yield { type: 'messages', data: batch }
}
// 完成
const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
yield { type: 'progress', data: doneProgress }
onProgress?.(doneProgress)
yield {
type: 'done',
data: { messageCount: messagesProcessed, memberCount: memberMap.size },
}
}
// ==================== 导出解析器 ====================
export const parser_: Parser = {
feature,
parse: parseEchotrace,
}
// ==================== 预处理器(预留) ====================
import { echotracePreprocessor } from './echotrace-preprocessor'
export const preprocessor = echotracePreprocessor
// ==================== 导出格式模块 ====================
const module_: FormatModule = {
feature,
parser: parser_,
preprocessor: echotracePreprocessor,
}
export default module_