feat(ai): 实现聊天记录预处理管道

- 新增 preprocessor 模块:数据清洗(XML卡片)、黑名单过滤、智能去噪、连续发言合并、数据脱敏
- 内置多国脱敏规则(中国手机号/身份证、美国SSN、日韩号码等)+ 自定义规则支持
- 工具层统一 wrapWithPreprocessing 包装,自动对 rawMessages 执行预处理+格式化
- 昵称匿名化:用 U{id} 替代真实昵称,跨工具调用一致
- SQL 查询补充 senderId/senderPlatformId
- PreprocessConfig 类型定义(preload + 主进程)
This commit is contained in:
digua
2026-02-27 23:56:35 +08:00
committed by digua
parent 1823042fad
commit c36878c58d
15 changed files with 785 additions and 14 deletions

View File

@@ -4,7 +4,7 @@ import type { ToolContext } from '../types'
import { timeParamProperties } from '../utils/schemas'
import * as workerManager from '../../../worker/workerManager'
import { parseExtendedTimeParams } from '../utils/time-params'
import { formatTimeRange, formatMessageCompact, t } from '../utils/format'
import { formatTimeRange, t } from '../utils/format'
const schema = Type.Object({
member_id_1: Type.Number({ description: 'ai.tools.get_conversation_between.params.member_id_1' }),
@@ -51,7 +51,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
member1: result.member1Name,
member2: result.member2Name,
timeRange: formatTimeRange(effectiveTimeFilter, locale),
conversation: result.messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: result.messages,
}
return {