mirror of
https://github.com/hellodigua/ChatLab.git
synced 2026-05-11 00:21:36 +08:00
feat: 重构导入逻辑
This commit is contained in:
@@ -78,7 +78,8 @@ function createDatabase(sessionId: string): Database.Database {
|
||||
CREATE TABLE IF NOT EXISTS member (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform_id TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL
|
||||
name TEXT NOT NULL,
|
||||
nickname TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS member_name_history (
|
||||
@@ -148,7 +149,7 @@ export function importData(parseResult: ParseResult): string {
|
||||
)
|
||||
|
||||
const insertMember = db.prepare(`
|
||||
INSERT OR IGNORE INTO member (platform_id, name) VALUES (?, ?)
|
||||
INSERT OR IGNORE INTO member (platform_id, name, nickname) VALUES (?, ?, ?)
|
||||
`)
|
||||
const getMemberId = db.prepare(`
|
||||
SELECT id FROM member WHERE platform_id = ?
|
||||
@@ -157,7 +158,7 @@ export function importData(parseResult: ParseResult): string {
|
||||
const memberIdMap = new Map<string, number>()
|
||||
|
||||
for (const member of parseResult.members) {
|
||||
insertMember.run(member.platformId, member.name)
|
||||
insertMember.run(member.platformId, member.name, member.nickname || null)
|
||||
const row = getMemberId.get(member.platformId) as { id: number }
|
||||
memberIdMap.set(member.platformId, row.id)
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import type { FormatModule } from '../types'
|
||||
// 导入所有格式模块
|
||||
import chatlab from './chatlab'
|
||||
import shuakamiQqExporterV4 from './shuakami-qq-exporter-v4'
|
||||
import shuakamiQqExporterLegacy from './shuakami-qq-exporter-legacy'
|
||||
import qqNativeTxt from './qq-native-txt'
|
||||
|
||||
/**
|
||||
* 所有支持的格式模块(按优先级排序)
|
||||
@@ -16,8 +16,8 @@ import shuakamiQqExporterLegacy from './shuakami-qq-exporter-legacy'
|
||||
export const formats: FormatModule[] = [
|
||||
chatlab, // 优先级 1
|
||||
shuakamiQqExporterV4, // 优先级 10 - shuakami/qq-chat-exporter V4
|
||||
shuakamiQqExporterLegacy, // 优先级 20 - shuakami/qq-chat-exporter Legacy
|
||||
qqNativeTxt, // 优先级 30 - QQ 官方导出 TXT
|
||||
]
|
||||
|
||||
// 按名称导出,方便单独使用
|
||||
export { chatlab, shuakamiQqExporterV4, shuakamiQqExporterLegacy }
|
||||
export { chatlab, shuakamiQqExporterV4, qqNativeTxt }
|
||||
|
||||
@@ -0,0 +1,278 @@
|
||||
/**
|
||||
* QQ 官方导出 TXT 格式解析器
|
||||
* 适配 QQ 群聊旧版导出功能
|
||||
*
|
||||
* 格式特征:
|
||||
* - 文件头:消息记录(此消息记录为文本格式,不支持重新导入)
|
||||
* - 群名:消息对象:xxx
|
||||
* - 消息格式:时间 昵称(QQ号) 或 时间 昵称<邮箱>
|
||||
* - 内容在消息头下一行,可能跨多行
|
||||
*
|
||||
* 字段映射:
|
||||
* - name: platformId(用户ID)
|
||||
* - nickname: 群昵称
|
||||
* - senderName: 群昵称(用于昵称历史追踪)
|
||||
*/
|
||||
|
||||
import * as fs from 'fs'
|
||||
import * as readline from 'readline'
|
||||
import { ChatPlatform, ChatType, MessageType } from '../../../../src/types/chat'
|
||||
import type {
|
||||
FormatFeature,
|
||||
FormatModule,
|
||||
Parser,
|
||||
ParseOptions,
|
||||
ParseEvent,
|
||||
ParsedMeta,
|
||||
ParsedMember,
|
||||
ParsedMessage,
|
||||
} from '../types'
|
||||
import { getFileSize, createProgress } from '../utils'
|
||||
|
||||
// ==================== 特征定义 ====================
|
||||
|
||||
export const feature: FormatFeature = {
|
||||
id: 'qq-native-txt',
|
||||
name: 'QQ 官方导出 (TXT)',
|
||||
platform: ChatPlatform.QQ,
|
||||
priority: 30,
|
||||
extensions: ['.txt'],
|
||||
signatures: {
|
||||
head: [/消息记录(此消息记录为文本格式/, /消息对象:/],
|
||||
},
|
||||
}
|
||||
|
||||
// ==================== 消息头正则 ====================
|
||||
|
||||
// 匹配:2019-07-16 18:15:05 夜喵大人🐱(642163903)
|
||||
// 或:2019-07-16 18:15:11 铛🔔<ppbaozi@gmail.com>
|
||||
const MESSAGE_HEADER_REGEX = /^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (.+?)(?:\(([^)]+)\)|<([^>]+)>)$/
|
||||
|
||||
// 匹配群名:消息对象:杭州FE
|
||||
const GROUP_NAME_REGEX = /^消息对象:(.+)$/
|
||||
|
||||
// ==================== 消息类型判断 ====================
|
||||
|
||||
function detectMessageType(content: string): MessageType {
|
||||
const trimmed = content.trim()
|
||||
if (trimmed === '[图片]') return MessageType.IMAGE
|
||||
if (trimmed === '[表情]') return MessageType.EMOJI
|
||||
if (trimmed === '[语音]') return MessageType.VOICE
|
||||
if (trimmed === '[视频]') return MessageType.VIDEO
|
||||
if (trimmed === '[文件]') return MessageType.FILE
|
||||
if (trimmed.startsWith('[') && trimmed.endsWith(']')) return MessageType.OTHER
|
||||
return MessageType.TEXT
|
||||
}
|
||||
|
||||
// ==================== 时间解析 ====================
|
||||
|
||||
/**
|
||||
* 解析本地时间字符串为秒级时间戳
|
||||
* @param timeStr 格式:2019-07-16 18:15:05
|
||||
*/
|
||||
function parseLocalTime(timeStr: string): number {
|
||||
// 直接用 Date 解析,会按本地时区处理
|
||||
const date = new Date(timeStr.replace(' ', 'T'))
|
||||
return Math.floor(date.getTime() / 1000)
|
||||
}
|
||||
|
||||
// ==================== 昵称清理 ====================
|
||||
|
||||
/**
|
||||
* 清理昵称中的前缀污染
|
||||
* 例如:【管理员】张三 -> 张三
|
||||
*/
|
||||
function cleanNickname(nickname: string): string {
|
||||
// 移除开头的【xxx】前缀(可能有多个)
|
||||
return nickname.replace(/^(【[^】]*】\s*)+/, '').trim()
|
||||
}
|
||||
|
||||
// ==================== 成员信息 ====================
|
||||
|
||||
interface MemberInfo {
|
||||
platformId: string
|
||||
nickname: string // 群昵称
|
||||
}
|
||||
|
||||
// 用于记录用户最近的有效昵称(昵称 != ID 的情况)
|
||||
const lastValidNickname = new Map<string, string>()
|
||||
|
||||
// ==================== 解析器实现 ====================
|
||||
|
||||
async function* parseTxt(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
|
||||
const { filePath, batchSize = 5000, onProgress } = options
|
||||
|
||||
const totalBytes = getFileSize(filePath)
|
||||
let bytesRead = 0
|
||||
let messagesProcessed = 0
|
||||
|
||||
// 发送初始进度
|
||||
const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...')
|
||||
yield { type: 'progress', data: initialProgress }
|
||||
onProgress?.(initialProgress)
|
||||
|
||||
// 收集数据
|
||||
let groupName = '未知群聊'
|
||||
const memberMap = new Map<string, MemberInfo>()
|
||||
const messages: ParsedMessage[] = []
|
||||
|
||||
// 当前正在解析的消息
|
||||
let currentMessage: {
|
||||
timestamp: number
|
||||
platformId: string
|
||||
nickname: string
|
||||
contentLines: string[]
|
||||
} | null = null
|
||||
|
||||
// 保存当前消息
|
||||
const saveCurrentMessage = () => {
|
||||
if (currentMessage) {
|
||||
const content = currentMessage.contentLines.join('\n').trim()
|
||||
const type = detectMessageType(content)
|
||||
|
||||
messages.push({
|
||||
senderPlatformId: currentMessage.platformId,
|
||||
senderName: currentMessage.nickname, // 用于昵称历史追踪
|
||||
timestamp: currentMessage.timestamp,
|
||||
type,
|
||||
content: content || null,
|
||||
})
|
||||
|
||||
// 更新成员信息(保留最新昵称)
|
||||
memberMap.set(currentMessage.platformId, {
|
||||
platformId: currentMessage.platformId,
|
||||
nickname: currentMessage.nickname,
|
||||
})
|
||||
|
||||
messagesProcessed++
|
||||
}
|
||||
}
|
||||
|
||||
// 逐行读取文件
|
||||
const fileStream = fs.createReadStream(filePath, { encoding: 'utf-8' })
|
||||
const rl = readline.createInterface({
|
||||
input: fileStream,
|
||||
crlfDelay: Infinity,
|
||||
})
|
||||
|
||||
fileStream.on('data', (chunk: string | Buffer) => {
|
||||
bytesRead += typeof chunk === 'string' ? Buffer.byteLength(chunk) : chunk.length
|
||||
})
|
||||
|
||||
for await (const line of rl) {
|
||||
// 检查群名
|
||||
const groupMatch = line.match(GROUP_NAME_REGEX)
|
||||
if (groupMatch) {
|
||||
groupName = groupMatch[1].trim()
|
||||
continue
|
||||
}
|
||||
|
||||
// 检查消息头
|
||||
const headerMatch = line.match(MESSAGE_HEADER_REGEX)
|
||||
if (headerMatch) {
|
||||
// 保存前一条消息
|
||||
saveCurrentMessage()
|
||||
|
||||
const timeStr = headerMatch[1]
|
||||
const rawNickname = headerMatch[2].trim()
|
||||
let nickname = cleanNickname(rawNickname) // 清理前缀污染
|
||||
const platformId = headerMatch[3] || headerMatch[4] // (id) 或 <email>
|
||||
|
||||
// 如果昵称和 ID 相同,可能是系统故障,使用之前记录的昵称
|
||||
if (nickname === platformId) {
|
||||
const previousNickname = lastValidNickname.get(platformId)
|
||||
if (previousNickname) {
|
||||
nickname = previousNickname
|
||||
}
|
||||
// 如果没有之前的记录,保持使用 ID 作为昵称
|
||||
} else {
|
||||
// 记录有效昵称(昵称 != ID)
|
||||
lastValidNickname.set(platformId, nickname)
|
||||
}
|
||||
|
||||
currentMessage = {
|
||||
timestamp: parseLocalTime(timeStr),
|
||||
platformId,
|
||||
nickname,
|
||||
contentLines: [],
|
||||
}
|
||||
|
||||
// 更新进度
|
||||
if (messagesProcessed % 1000 === 0) {
|
||||
const progress = createProgress(
|
||||
'parsing',
|
||||
bytesRead,
|
||||
totalBytes,
|
||||
messagesProcessed,
|
||||
`已处理 ${messagesProcessed} 条消息...`
|
||||
)
|
||||
onProgress?.(progress)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// 内容行(追加到当前消息)
|
||||
if (currentMessage) {
|
||||
// 跳过分隔线
|
||||
if (line.startsWith('=====')) continue
|
||||
// 跳过文件头信息
|
||||
if (line.startsWith('消息记录') || line.startsWith('消息分组')) continue
|
||||
|
||||
currentMessage.contentLines.push(line)
|
||||
}
|
||||
}
|
||||
|
||||
// 保存最后一条消息
|
||||
saveCurrentMessage()
|
||||
|
||||
// 发送 meta
|
||||
const meta: ParsedMeta = {
|
||||
name: groupName,
|
||||
platform: ChatPlatform.QQ,
|
||||
type: ChatType.GROUP,
|
||||
}
|
||||
yield { type: 'meta', data: meta }
|
||||
|
||||
// 发送成员(name 使用 platformId,nickname 使用群昵称)
|
||||
const members: ParsedMember[] = Array.from(memberMap.values()).map((m) => ({
|
||||
platformId: m.platformId,
|
||||
name: m.platformId, // name 使用 ID
|
||||
nickname: m.nickname, // nickname 使用群昵称
|
||||
}))
|
||||
yield { type: 'members', data: members }
|
||||
|
||||
// 分批发送消息
|
||||
for (let i = 0; i < messages.length; i += batchSize) {
|
||||
const batch = messages.slice(i, i + batchSize)
|
||||
yield { type: 'messages', data: batch }
|
||||
}
|
||||
|
||||
// 完成
|
||||
const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
|
||||
yield { type: 'progress', data: doneProgress }
|
||||
onProgress?.(doneProgress)
|
||||
|
||||
yield {
|
||||
type: 'done',
|
||||
data: { messageCount: messagesProcessed, memberCount: memberMap.size },
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 导出解析器 ====================
|
||||
|
||||
export const parser_: Parser = {
|
||||
feature,
|
||||
parse: parseTxt,
|
||||
}
|
||||
|
||||
// ==================== 导出格式模块 ====================
|
||||
|
||||
const module_: FormatModule = {
|
||||
feature,
|
||||
parser: parser_,
|
||||
// TXT 格式不需要预处理器
|
||||
}
|
||||
|
||||
export default module_
|
||||
|
||||
@@ -1,271 +0,0 @@
|
||||
/**
|
||||
* shuakami/qq-chat-exporter Legacy 格式解析器
|
||||
* 适配项目: https://github.com/shuakami/qq-chat-exporter
|
||||
* 版本: V1-V3(早期版本)
|
||||
*
|
||||
* 特征:
|
||||
* - 时间戳使用毫秒数
|
||||
* - 没有 metadata.version 或版本号小于 4
|
||||
* - sender 中主要使用 uin 字段
|
||||
*
|
||||
* 注意:此解析器仅适配 shuakami/qq-chat-exporter 项目导出的格式,
|
||||
* 其他 QQ 聊天记录导出工具可能需要创建独立的解析器。
|
||||
*/
|
||||
|
||||
import * as fs from 'fs'
|
||||
import { parser } from 'stream-json'
|
||||
import { pick } from 'stream-json/filters/Pick'
|
||||
import { streamValues } from 'stream-json/streamers/StreamValues'
|
||||
import { chain } from 'stream-chain'
|
||||
import { ChatPlatform, ChatType, MessageType } from '../../../../src/types/chat'
|
||||
import type {
|
||||
FormatFeature,
|
||||
FormatModule,
|
||||
Parser,
|
||||
ParseOptions,
|
||||
ParseEvent,
|
||||
ParsedMeta,
|
||||
ParsedMember,
|
||||
ParsedMessage,
|
||||
} from '../types'
|
||||
import { getFileSize, createProgress, readFileHeadBytes, parseTimestamp, isValidYear } from '../utils'
|
||||
|
||||
// ==================== 特征定义 ====================
|
||||
|
||||
export const feature: FormatFeature = {
|
||||
id: 'shuakami-qq-exporter-legacy',
|
||||
name: 'shuakami/qq-chat-exporter (Legacy)',
|
||||
platform: ChatPlatform.QQ,
|
||||
priority: 20, // 低于 V4
|
||||
extensions: ['.json'],
|
||||
signatures: {
|
||||
head: [/QQChatExporter/, /"chatInfo"/],
|
||||
requiredFields: ['chatInfo', 'messages'],
|
||||
},
|
||||
}
|
||||
|
||||
// ==================== 消息结构 ====================
|
||||
|
||||
interface LegacyMessage {
|
||||
id?: string
|
||||
timestamp: number // 毫秒时间戳
|
||||
sender: {
|
||||
uid?: string
|
||||
uin: string
|
||||
name: string
|
||||
}
|
||||
type?: string
|
||||
system?: boolean
|
||||
recalled?: boolean
|
||||
content: {
|
||||
text: string
|
||||
html?: string
|
||||
resources?: Array<{ type: string }>
|
||||
elements?: Array<{ type: string }>
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 消息类型转换 ====================
|
||||
|
||||
function convertMessageType(qqType: string | undefined, content: LegacyMessage['content']): MessageType {
|
||||
// 检查资源类型
|
||||
if (content.resources && content.resources.length > 0) {
|
||||
const resourceType = content.resources[0].type
|
||||
switch (resourceType) {
|
||||
case 'image':
|
||||
return MessageType.IMAGE
|
||||
case 'video':
|
||||
return MessageType.VIDEO
|
||||
case 'voice':
|
||||
case 'audio':
|
||||
return MessageType.VOICE
|
||||
case 'file':
|
||||
return MessageType.FILE
|
||||
}
|
||||
}
|
||||
|
||||
// 检查元素类型
|
||||
if (content.elements) {
|
||||
for (const elem of content.elements) {
|
||||
if (elem.type === 'market_face' || elem.type === 'face') {
|
||||
return MessageType.EMOJI
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 根据 type 字符串判断
|
||||
switch (qqType) {
|
||||
case 'type_1':
|
||||
return MessageType.TEXT
|
||||
case 'type_17':
|
||||
return MessageType.EMOJI
|
||||
case 'type_3':
|
||||
return MessageType.IMAGE
|
||||
case 'type_7':
|
||||
return MessageType.VOICE
|
||||
default:
|
||||
return MessageType.TEXT
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 解析器实现 ====================
|
||||
|
||||
async function* parseLegacy(options: ParseOptions): AsyncGenerator<ParseEvent, void, unknown> {
|
||||
const { filePath, batchSize = 5000, onProgress } = options
|
||||
|
||||
const totalBytes = getFileSize(filePath)
|
||||
let bytesRead = 0
|
||||
let messagesProcessed = 0
|
||||
|
||||
// 发送初始进度
|
||||
const initialProgress = createProgress('parsing', 0, totalBytes, 0, '开始解析...')
|
||||
yield { type: 'progress', data: initialProgress }
|
||||
onProgress?.(initialProgress)
|
||||
|
||||
// 读取文件头获取 meta 信息
|
||||
const headContent = readFileHeadBytes(filePath, 100000)
|
||||
|
||||
// 解析 chatInfo
|
||||
let chatInfo = { name: '未知群聊', type: 'group' as const }
|
||||
try {
|
||||
const chatInfoMatch = headContent.match(/"chatInfo"\s*:\s*(\{[^}]+\})/)
|
||||
if (chatInfoMatch) {
|
||||
chatInfo = JSON.parse(chatInfoMatch[1])
|
||||
}
|
||||
} catch {
|
||||
// 使用默认值
|
||||
}
|
||||
|
||||
// 发送 meta
|
||||
const meta: ParsedMeta = {
|
||||
name: chatInfo.name,
|
||||
platform: ChatPlatform.QQ,
|
||||
type: chatInfo.type === 'group' ? ChatType.GROUP : ChatType.PRIVATE,
|
||||
}
|
||||
yield { type: 'meta', data: meta }
|
||||
|
||||
// 收集成员和消息
|
||||
const memberMap = new Map<string, ParsedMember>()
|
||||
let messageBatch: ParsedMessage[] = []
|
||||
|
||||
// 流式解析
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const readStream = fs.createReadStream(filePath, { encoding: 'utf-8' })
|
||||
|
||||
readStream.on('data', (chunk: string | Buffer) => {
|
||||
bytesRead += typeof chunk === 'string' ? Buffer.byteLength(chunk) : chunk.length
|
||||
})
|
||||
|
||||
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamValues()])
|
||||
|
||||
const processMessage = (msg: LegacyMessage): ParsedMessage | null => {
|
||||
// 获取 platformId
|
||||
const platformId = msg.sender.uin || msg.sender.uid
|
||||
if (!platformId) return null
|
||||
|
||||
// 获取发送者名称
|
||||
const senderName = msg.sender.name || platformId
|
||||
|
||||
// 更新成员
|
||||
memberMap.set(platformId, { platformId, name: senderName })
|
||||
|
||||
// 解析时间戳(毫秒)
|
||||
const timestamp = parseTimestamp(msg.timestamp)
|
||||
if (timestamp === null || !isValidYear(timestamp)) return null
|
||||
|
||||
// 消息类型
|
||||
const type = msg.system ? MessageType.SYSTEM : convertMessageType(msg.type, msg.content)
|
||||
|
||||
// 文本内容
|
||||
let textContent = msg.content?.text || ''
|
||||
if (msg.recalled) {
|
||||
textContent = '[已撤回] ' + textContent
|
||||
}
|
||||
|
||||
return {
|
||||
senderPlatformId: platformId,
|
||||
senderName,
|
||||
timestamp,
|
||||
type,
|
||||
content: textContent || null,
|
||||
}
|
||||
}
|
||||
|
||||
// 用于收集批次的临时数组
|
||||
const batchCollector: ParsedMessage[] = []
|
||||
|
||||
pipeline.on('data', ({ value }: { value: LegacyMessage }) => {
|
||||
const parsed = processMessage(value)
|
||||
if (parsed) {
|
||||
batchCollector.push(parsed)
|
||||
messagesProcessed++
|
||||
|
||||
// 达到批次大小
|
||||
if (batchCollector.length >= batchSize) {
|
||||
messageBatch.push(...batchCollector)
|
||||
batchCollector.length = 0
|
||||
|
||||
const progress = createProgress(
|
||||
'parsing',
|
||||
bytesRead,
|
||||
totalBytes,
|
||||
messagesProcessed,
|
||||
`已处理 ${messagesProcessed} 条消息...`
|
||||
)
|
||||
onProgress?.(progress)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
pipeline.on('end', () => {
|
||||
// 收集剩余消息
|
||||
if (batchCollector.length > 0) {
|
||||
messageBatch.push(...batchCollector)
|
||||
}
|
||||
resolve()
|
||||
})
|
||||
|
||||
pipeline.on('error', reject)
|
||||
})
|
||||
|
||||
// 发送成员
|
||||
yield { type: 'members', data: Array.from(memberMap.values()) }
|
||||
|
||||
// 分批发送消息
|
||||
for (let i = 0; i < messageBatch.length; i += batchSize) {
|
||||
const batch = messageBatch.slice(i, i + batchSize)
|
||||
yield { type: 'messages', data: batch }
|
||||
}
|
||||
|
||||
// 完成
|
||||
const doneProgress = createProgress('done', totalBytes, totalBytes, messagesProcessed, '解析完成')
|
||||
yield { type: 'progress', data: doneProgress }
|
||||
onProgress?.(doneProgress)
|
||||
|
||||
yield {
|
||||
type: 'done',
|
||||
data: { messageCount: messagesProcessed, memberCount: memberMap.size },
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 导出解析器 ====================
|
||||
|
||||
export const parser_: Parser = {
|
||||
feature,
|
||||
parse: parseLegacy,
|
||||
}
|
||||
|
||||
// ==================== 导出预处理器 ====================
|
||||
|
||||
import { qqPreprocessor } from './shuakami-qq-preprocessor'
|
||||
export const preprocessor = qqPreprocessor
|
||||
|
||||
// ==================== 导出格式模块 ====================
|
||||
|
||||
const module_: FormatModule = {
|
||||
feature,
|
||||
parser: parser_,
|
||||
preprocessor: qqPreprocessor,
|
||||
}
|
||||
|
||||
export default module_
|
||||
@@ -1,15 +1,19 @@
|
||||
/**
|
||||
* shuakami/qq-chat-exporter V4 格式解析器
|
||||
* 适配项目: https://github.com/shuakami/qq-chat-exporter
|
||||
* 版本: V4.x
|
||||
* 版本: V4.x (2024年12月更新)
|
||||
*
|
||||
* 特征:
|
||||
* - 时间戳使用 ISO 字符串格式(如 "2017-12-30T03:24:36.000Z")
|
||||
* - 时间戳使用 ISO 字符串格式(如 "2022-10-29T06:42:53.000Z")
|
||||
* - metadata.version 为 "4.x.x"
|
||||
* - sender 中有 uid 字段
|
||||
* - rawMessage 中包含 sendNickName(QQ昵称)、sendMemberName(群昵称)
|
||||
*
|
||||
* 注意:此解析器仅适配 shuakami/qq-chat-exporter 项目导出的格式,
|
||||
* 其他 QQ 聊天记录导出工具可能需要创建独立的解析器。
|
||||
* 名字字段说明:
|
||||
* - sendNickName: QQ原始昵称(始终存在)
|
||||
* - sendMemberName: 群昵称(可选,用户未设置时不存在)
|
||||
* - sendRemarkName: 导出者的备注名(不使用)
|
||||
*
|
||||
* 显示名优先级: sendMemberName > sendNickName
|
||||
*/
|
||||
|
||||
import * as fs from 'fs'
|
||||
@@ -36,7 +40,7 @@ export const feature: FormatFeature = {
|
||||
id: 'shuakami-qq-exporter-v4',
|
||||
name: 'shuakami/qq-chat-exporter V4',
|
||||
platform: ChatPlatform.QQ,
|
||||
priority: 10, // 高优先级
|
||||
priority: 10,
|
||||
extensions: ['.json'],
|
||||
signatures: {
|
||||
head: [/QQChatExporter V4/, /"version"\s*:\s*"4\./],
|
||||
@@ -46,6 +50,14 @@ export const feature: FormatFeature = {
|
||||
|
||||
// ==================== 消息结构 ====================
|
||||
|
||||
interface V4RawMessage {
|
||||
senderUin?: string
|
||||
senderUid?: string
|
||||
sendNickName?: string // QQ原始昵称
|
||||
sendMemberName?: string // 群昵称
|
||||
msgTime?: string // 秒级时间戳字符串
|
||||
}
|
||||
|
||||
interface V4Message {
|
||||
messageId?: string
|
||||
timestamp: string // ISO 格式
|
||||
@@ -63,7 +75,17 @@ interface V4Message {
|
||||
raw?: string
|
||||
resources?: Array<{ type: string }>
|
||||
elements?: Array<{ type: string }>
|
||||
emojis?: Array<{ type: string }>
|
||||
}
|
||||
rawMessage?: V4RawMessage
|
||||
}
|
||||
|
||||
// ==================== 成员信息追踪 ====================
|
||||
|
||||
interface MemberInfo {
|
||||
platformId: string
|
||||
displayName: string // 显示名(sendMemberName || sendNickName)
|
||||
nickname: string // QQ原始昵称(sendNickName)
|
||||
}
|
||||
|
||||
// ==================== 消息类型转换 ====================
|
||||
@@ -85,13 +107,9 @@ function convertMessageType(messageType: number | undefined, content: V4Message[
|
||||
}
|
||||
}
|
||||
|
||||
// 检查元素类型
|
||||
if (content.elements) {
|
||||
for (const elem of content.elements) {
|
||||
if (elem.type === 'market_face' || elem.type === 'face') {
|
||||
return MessageType.EMOJI
|
||||
}
|
||||
}
|
||||
// 检查 emojis 字段
|
||||
if (content.emojis && content.emojis.length > 0) {
|
||||
return MessageType.EMOJI
|
||||
}
|
||||
|
||||
// 根据 messageType 判断
|
||||
@@ -99,11 +117,13 @@ function convertMessageType(messageType: number | undefined, content: V4Message[
|
||||
case 1:
|
||||
return MessageType.TEXT
|
||||
case 2:
|
||||
return MessageType.IMAGE
|
||||
return MessageType.TEXT // 普通消息
|
||||
case 3:
|
||||
return MessageType.VOICE
|
||||
return MessageType.IMAGE
|
||||
case 7:
|
||||
return MessageType.VIDEO
|
||||
case 9:
|
||||
return MessageType.TEXT // 回复消息
|
||||
default:
|
||||
return MessageType.TEXT
|
||||
}
|
||||
@@ -146,7 +166,7 @@ async function* parseV4(options: ParseOptions): AsyncGenerator<ParseEvent, void,
|
||||
yield { type: 'meta', data: meta }
|
||||
|
||||
// 收集成员和消息
|
||||
const memberMap = new Map<string, ParsedMember>()
|
||||
const memberMap = new Map<string, MemberInfo>()
|
||||
let messageBatch: ParsedMessage[] = []
|
||||
|
||||
// 流式解析
|
||||
@@ -160,15 +180,31 @@ async function* parseV4(options: ParseOptions): AsyncGenerator<ParseEvent, void,
|
||||
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamValues()])
|
||||
|
||||
const processMessage = (msg: V4Message): ParsedMessage | null => {
|
||||
// 获取 platformId
|
||||
const platformId = msg.sender.uin || msg.sender.uid
|
||||
// 获取 platformId:优先使用 uin(QQ号),fallback 到 uid
|
||||
const platformId = msg.sender.uin || msg.sender.uid || msg.rawMessage?.senderUin || msg.rawMessage?.senderUid
|
||||
if (!platformId) return null
|
||||
|
||||
// 获取发送者名称
|
||||
const senderName = msg.sender.name || platformId
|
||||
// 从 rawMessage 获取名字信息
|
||||
const raw = msg.rawMessage
|
||||
const sendNickName = raw?.sendNickName || msg.sender.name || platformId
|
||||
const sendMemberName = raw?.sendMemberName
|
||||
|
||||
// 更新成员
|
||||
memberMap.set(platformId, { platformId, name: senderName })
|
||||
// 显示名优先级:群昵称 > QQ昵称
|
||||
const displayName = sendMemberName || sendNickName
|
||||
|
||||
// 更新成员信息(保留最新的名字)
|
||||
const existingMember = memberMap.get(platformId)
|
||||
if (!existingMember) {
|
||||
memberMap.set(platformId, {
|
||||
platformId,
|
||||
displayName,
|
||||
nickname: sendNickName,
|
||||
})
|
||||
} else {
|
||||
// 更新为最新的名字
|
||||
existingMember.displayName = displayName
|
||||
existingMember.nickname = sendNickName
|
||||
}
|
||||
|
||||
// 解析时间戳
|
||||
const timestamp = parseTimestamp(msg.timestamp)
|
||||
@@ -185,7 +221,7 @@ async function* parseV4(options: ParseOptions): AsyncGenerator<ParseEvent, void,
|
||||
|
||||
return {
|
||||
senderPlatformId: platformId,
|
||||
senderName,
|
||||
senderName: displayName, // 用于昵称历史追踪
|
||||
timestamp,
|
||||
type,
|
||||
content: textContent || null,
|
||||
@@ -229,8 +265,13 @@ async function* parseV4(options: ParseOptions): AsyncGenerator<ParseEvent, void,
|
||||
pipeline.on('error', reject)
|
||||
})
|
||||
|
||||
// 发送成员
|
||||
yield { type: 'members', data: Array.from(memberMap.values()) }
|
||||
// 发送成员(包含 nickname)
|
||||
const members: ParsedMember[] = Array.from(memberMap.values()).map((m) => ({
|
||||
platformId: m.platformId,
|
||||
name: m.displayName,
|
||||
nickname: m.nickname,
|
||||
}))
|
||||
yield { type: 'members', data: members }
|
||||
|
||||
// 分批发送消息
|
||||
for (let i = 0; i < messageBatch.length; i += batchSize) {
|
||||
|
||||
@@ -11,7 +11,7 @@ import * as path from 'path'
|
||||
import * as os from 'os'
|
||||
import { parser } from 'stream-json'
|
||||
import { pick } from 'stream-json/filters/Pick'
|
||||
import { streamArray } from 'stream-json/streamers/StreamArray'
|
||||
import { streamValues } from 'stream-json/streamers/StreamValues'
|
||||
import { chain } from 'stream-chain'
|
||||
import type { ParseProgress, Preprocessor } from '../types'
|
||||
import { getFileSize, createProgress } from '../utils'
|
||||
@@ -53,11 +53,19 @@ interface SlimQQMessage {
|
||||
text: string
|
||||
elements?: Array<{ type: string }>
|
||||
resources?: Array<{ type: string }>
|
||||
emojis?: Array<{ type: string }>
|
||||
}
|
||||
recalled?: boolean
|
||||
isRecalled?: boolean
|
||||
system?: boolean
|
||||
isSystemMessage?: boolean
|
||||
// V4 新增:保留 rawMessage 中的名字字段
|
||||
rawMessage?: {
|
||||
sendNickName?: string
|
||||
sendMemberName?: string
|
||||
senderUin?: string
|
||||
senderUid?: string
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -66,6 +74,7 @@ interface SlimQQMessage {
|
||||
function slimMessage(msg: Record<string, unknown>): SlimQQMessage {
|
||||
const sender = msg.sender as { uin?: string; uid?: string; name?: string } | undefined
|
||||
const content = msg.content as Record<string, unknown> | undefined
|
||||
const rawMessage = msg.rawMessage as Record<string, unknown> | undefined
|
||||
|
||||
const slimContent: SlimQQMessage['content'] = {
|
||||
text: (content?.text as string) || '',
|
||||
@@ -83,6 +92,12 @@ function slimMessage(msg: Record<string, unknown>): SlimQQMessage {
|
||||
}))
|
||||
}
|
||||
|
||||
if (content?.emojis && Array.isArray(content.emojis)) {
|
||||
slimContent.emojis = (content.emojis as Array<{ type: string }>).map((e) => ({
|
||||
type: e.type,
|
||||
}))
|
||||
}
|
||||
|
||||
const slimMsg: SlimQQMessage = {
|
||||
timestamp: msg.timestamp as number | string,
|
||||
sender: { name: sender?.name || '' },
|
||||
@@ -105,6 +120,15 @@ function slimMessage(msg: Record<string, unknown>): SlimQQMessage {
|
||||
if (sender?.uin) slimMsg.sender.uin = sender.uin
|
||||
if (sender?.uid) slimMsg.sender.uid = sender.uid
|
||||
|
||||
// V4 新增:保留 rawMessage 中的关键名字字段
|
||||
if (rawMessage) {
|
||||
slimMsg.rawMessage = {}
|
||||
if (rawMessage.sendNickName) slimMsg.rawMessage.sendNickName = rawMessage.sendNickName as string
|
||||
if (rawMessage.sendMemberName) slimMsg.rawMessage.sendMemberName = rawMessage.sendMemberName as string
|
||||
if (rawMessage.senderUin) slimMsg.rawMessage.senderUin = rawMessage.senderUin as string
|
||||
if (rawMessage.senderUid) slimMsg.rawMessage.senderUid = rawMessage.senderUid as string
|
||||
}
|
||||
|
||||
return slimMsg
|
||||
}
|
||||
|
||||
@@ -173,11 +197,12 @@ async function preprocessQQJson(inputPath: string, onProgress?: (progress: Parse
|
||||
|
||||
const header = { metadata, chatInfo, messages: [] }
|
||||
const headerJson = JSON.stringify(header)
|
||||
writeStream.write(headerJson.slice(0, -3) + '\n')
|
||||
// 移除最后的 ]} 保留 [,结果如 {"metadata":...,"chatInfo":...,"messages":[
|
||||
writeStream.write(headerJson.slice(0, -2) + '\n')
|
||||
|
||||
let isFirstMessage = true
|
||||
|
||||
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamArray()])
|
||||
const pipeline = chain([readStream, parser(), pick({ filter: /^messages\.\d+$/ }), streamValues()])
|
||||
|
||||
pipeline.on('data', ({ value }: { value: Record<string, unknown> }) => {
|
||||
const slimMsg = slimMessage(value)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
import { FormatSniffer, createSniffer } from './sniffer'
|
||||
import { formats } from './formats'
|
||||
import { getFileSize } from './utils'
|
||||
import type {
|
||||
ParseOptions,
|
||||
ParseEvent,
|
||||
|
||||
@@ -80,7 +80,8 @@ function createDatabase(sessionId: string): Database.Database {
|
||||
CREATE TABLE IF NOT EXISTS member (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
platform_id TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL
|
||||
name TEXT NOT NULL,
|
||||
nickname TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS member_name_history (
|
||||
@@ -165,7 +166,7 @@ export async function streamImport(filePath: string, requestId: string): Promise
|
||||
INSERT INTO meta (name, platform, type, imported_at) VALUES (?, ?, ?, ?)
|
||||
`)
|
||||
const insertMember = db.prepare(`
|
||||
INSERT OR IGNORE INTO member (platform_id, name) VALUES (?, ?)
|
||||
INSERT OR IGNORE INTO member (platform_id, name, nickname) VALUES (?, ?, ?)
|
||||
`)
|
||||
const getMemberId = db.prepare(`SELECT id FROM member WHERE platform_id = ?`)
|
||||
const insertMessage = db.prepare(`
|
||||
@@ -209,7 +210,7 @@ export async function streamImport(filePath: string, requestId: string): Promise
|
||||
onMembers: (members: ParsedMember[]) => {
|
||||
console.log(`[StreamImport] 收到 ${members.length} 个成员`)
|
||||
for (const member of members) {
|
||||
insertMember.run(member.platformId, member.name)
|
||||
insertMember.run(member.platformId, member.name, member.nickname || null)
|
||||
const row = getMemberId.get(member.platformId) as { id: number } | undefined
|
||||
if (row) {
|
||||
memberIdMap.set(member.platformId, row.id)
|
||||
@@ -236,7 +237,7 @@ export async function streamImport(filePath: string, requestId: string): Promise
|
||||
// 确保成员存在
|
||||
if (!memberIdMap.has(msg.senderPlatformId)) {
|
||||
const memberName = msg.senderName || msg.senderPlatformId
|
||||
insertMember.run(msg.senderPlatformId, memberName)
|
||||
insertMember.run(msg.senderPlatformId, memberName, null)
|
||||
const row = getMemberId.get(msg.senderPlatformId) as { id: number } | undefined
|
||||
if (row) {
|
||||
memberIdMap.set(msg.senderPlatformId, row.id)
|
||||
|
||||
+4
-2
@@ -79,7 +79,8 @@ export interface DbMeta {
|
||||
export interface DbMember {
|
||||
id: number // 自增ID
|
||||
platform_id: string // 平台标识(QQ号等)
|
||||
name: string // 最新昵称
|
||||
name: string // 最新显示昵称(群昵称优先)
|
||||
nickname: string | null // QQ原始昵称
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -100,7 +101,8 @@ export interface DbMessage {
|
||||
*/
|
||||
export interface ParsedMember {
|
||||
platformId: string // 平台标识
|
||||
name: string // 昵称
|
||||
name: string // 显示昵称(群昵称优先,否则QQ昵称)
|
||||
nickname?: string // QQ原始昵称(sendNickName)
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user