Files
ChatLab/electron/main/parser/formats/chatlab.ts
2025-12-21 17:20:06 +08:00

273 lines
7.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* ChatLab JSON 格式
* ChatLab 专属的统一格式(通过 chatlab 对象识别)
*
* 特征:
* - 文件头包含 "chatlab" 字段
* - 有 version 版本号
* - 消息结构已经是标准化的
*/
import * as fs from 'fs'
import { parser } from 'stream-json'
import { pick } from 'stream-json/filters/Pick'
import { streamValues } from 'stream-json/streamers/StreamValues'
import { chain } from 'stream-chain'
import { KNOWN_PLATFORMS, ChatType } from '../../../../src/types/base'
import type {
FormatFeature,
FormatModule,
Parser,
ParseOptions,
ParseEvent,
ParsedMeta,
ParsedMember,
ParsedMessage,
} from '../types'
import { getFileSize, createProgress, readFileHeadBytes } from '../utils'
import * as path from 'path'
// ==================== 辅助函数 ====================
/**
* 从文件名提取群名
* 返回不含扩展名的文件名
*/
function extractNameFromFilePath(filePath: string): string {
const basename = path.basename(filePath)
// 移除 .json 扩展名
const name = basename.replace(/\.json$/i, '')
return name || '未知群聊'
}
// ==================== 特征定义 ====================
export const feature: FormatFeature = {
id: 'chatlab',
name: 'ChatLab JSON',
platform: KNOWN_PLATFORMS.UNKNOWN, // ChatLab 格式可能包含多平台数据
priority: 1, // 最高优先级
extensions: ['.json'],
signatures: {
head: [/"chatlab"\s*:\s*\{/, /"version"\s*:\s*"/],
requiredFields: ['chatlab', 'meta', 'messages'],
},
}
// ==================== 消息结构 ====================
interface ChatLabMessage {
sender: string // platformId
accountName: string // 发送时的账号名称
groupNickname?: string // 发送时的群昵称
timestamp: number // 秒级时间戳
type: number // MessageType
content: string | null
}
interface ChatLabMember {
platformId: string
accountName: string // 账号名称
groupNickname?: string // 群昵称
aliases?: string[]
avatar?: string // 头像base64 Data URL
}
// ==================== 解析器实现 ====================
async function* parseChatLab(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
const { filePath, batchSize = 5000, onProgress } = options
const totalBytes = getFileSize(filePath)
let bytesRead = 0
let messagesProcessed = 0
// 发送初始进度
const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...')
yield { type: 'progress', data: initialProgress }
onProgress?.(initialProgress)
// 读取文件头获取 meta 和 members 信息
const headContent = readFileHeadBytes(filePath, 200000)
// 解析 meta
let meta: ParsedMeta = {
name: '未知群聊',
platform: KNOWN_PLATFORMS.UNKNOWN,
type: ChatType.GROUP,
}
try {
// 使用更健壮的方式解析嵌套 JSON 对象
// 因为 meta 可能包含 sources 数组(嵌套对象),简单的正则无法正确匹配
const metaStartMatch = headContent.match(/"meta"\s*:\s*\{/)
if (metaStartMatch && metaStartMatch.index !== undefined) {
const startIndex = metaStartMatch.index + metaStartMatch[0].length - 1 // 指向 {
let depth = 0
let endIndex = startIndex
// 遍历字符找到匹配的闭合 }
for (let i = startIndex; i < headContent.length; i++) {
const char = headContent[i]
if (char === '{') {
depth++
} else if (char === '}') {
depth--
if (depth === 0) {
endIndex = i
break
}
}
}
if (endIndex > startIndex) {
const metaJson = headContent.slice(startIndex, endIndex + 1)
const metaObj = JSON.parse(metaJson)
meta = {
name: metaObj.name || '未知群聊',
platform: metaObj.platform || KNOWN_PLATFORMS.UNKNOWN,
type: (metaObj.type as ChatType) || ChatType.GROUP,
groupId: metaObj.groupId,
groupAvatar: metaObj.groupAvatar,
}
}
}
} catch {
// 使用默认值
}
// 如果群名仍是默认值,使用文件名作为后备
if (meta.name === '未知群聊') {
meta.name = extractNameFromFilePath(filePath)
}
yield { type: 'meta', data: meta }
// 解析 members如果在文件开头能找到
const members: ParsedMember[] = []
try {
const membersMatch = headContent.match(/"members"\s*:\s*\[([\s\S]*?)\]/)
if (membersMatch) {
const membersJson = JSON.parse(`[${membersMatch[1]}]`) as ChatLabMember[]
for (const m of membersJson) {
members.push({
platformId: m.platformId,
accountName: m.accountName,
groupNickname: m.groupNickname,
avatar: m.avatar,
})
}
}
} catch {
// members 可能太大,稍后从消息中收集
}
// 收集成员和消息
const memberMapFromMessages = new Map<string, ParsedMember>()
let messageBatch: ParsedMessage[] = []
// 流式解析
await new Promise<void>((resolve, reject) => {
const readStream = fs.createReadStream(filePath, { encoding: 'utf-8' })
readStream.on('data', (chunk: string | Buffer) => {
bytesRead += typeof chunk === 'string' ? Buffer.byteLength(chunk) : chunk.length
})
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamValues()])
// 用于收集批次的临时数组
const batchCollector: ParsedMessage[] = []
pipeline.on('data', ({ value }: { value: ChatLabMessage }) => {
const msg = value
// 如果前面没解析到 members从消息中收集
if (members.length === 0) {
memberMapFromMessages.set(msg.sender, {
platformId: msg.sender,
accountName: msg.accountName,
groupNickname: msg.groupNickname,
})
}
batchCollector.push({
senderPlatformId: msg.sender,
senderAccountName: msg.accountName,
senderGroupNickname: msg.groupNickname,
timestamp: msg.timestamp,
type: msg.type,
content: msg.content,
})
messagesProcessed++
// 达到批次大小
if (batchCollector.length >= batchSize) {
messageBatch.push(...batchCollector)
batchCollector.length = 0
const progress = createProgress(
'parsing',
bytesRead,
totalBytes,
messagesProcessed,
`已处理 ${messagesProcessed} 条消息...`
)
onProgress?.(progress)
}
})
pipeline.on('end', () => {
// 收集剩余消息
if (batchCollector.length > 0) {
messageBatch.push(...batchCollector)
}
resolve()
})
pipeline.on('error', reject)
})
// 发送成员
if (members.length > 0) {
yield { type: 'members', data: members }
} else if (memberMapFromMessages.size > 0) {
yield { type: 'members', data: Array.from(memberMapFromMessages.values()) }
}
// 分批发送消息
for (let i = 0; i < messageBatch.length; i += batchSize) {
const batch = messageBatch.slice(i, i + batchSize)
yield { type: 'messages', data: batch }
}
// 完成
const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
yield { type: 'progress', data: doneProgress }
onProgress?.(doneProgress)
yield {
type: 'done',
data: {
messageCount: messagesProcessed,
memberCount: members.length > 0 ? members.length : memberMapFromMessages.size,
},
}
}
// ==================== 导出解析器 ====================
export const parser_: Parser = {
feature,
parse: parseChatLab,
}
// ==================== 导出格式模块 ====================
const module_: FormatModule = {
feature,
parser: parser_,
}
export default module_