feat(ai): 实现聊天记录预处理管道

- 新增 preprocessor 模块:数据清洗(XML卡片)、黑名单过滤、智能去噪、连续发言合并、数据脱敏
- 内置多国脱敏规则(中国手机号/身份证、美国SSN、日韩号码等)+ 自定义规则支持
- 工具层统一 wrapWithPreprocessing 包装,自动对 rawMessages 执行预处理+格式化
- 昵称匿名化:用 U{id} 替代真实昵称,跨工具调用一致
- SQL 查询补充 senderId/senderPlatformId
- PreprocessConfig 类型定义(preload + 主进程)
This commit is contained in:
digua
2026-02-27 23:56:35 +08:00
committed by digua
parent 1823042fad
commit c36878c58d
15 changed files with 785 additions and 14 deletions
@@ -0,0 +1,198 @@
/**
* 内置脱敏规则库
* 按 locale 分组,支持通用规则和地区特定规则
*/
import type { DesensitizeRule } from './types'
export type { DesensitizeRule }
export const BUILTIN_DESENSITIZE_RULES: DesensitizeRule[] = [
// ==================== 中国 (zh-CN) ====================
{
id: 'cn_phone',
label: 'desensitize.rules.cn_phone',
pattern: '(?<!\\d)1[3-9]\\d{9}(?!\\d)',
replacement: '[手机号]',
enabled: true,
builtin: true,
locales: ['zh-CN'],
},
{
id: 'cn_id_card',
label: 'desensitize.rules.cn_id_card',
pattern: '(?<!\\d)\\d{17}[\\dXx](?!\\d)',
replacement: '[身份证]',
enabled: true,
builtin: true,
locales: ['zh-CN'],
},
{
id: 'cn_bank_card',
label: 'desensitize.rules.cn_bank_card',
pattern: '(?<!\\d)\\d{16,19}(?!\\d)',
replacement: '[银行卡]',
enabled: false,
builtin: true,
locales: ['zh-CN'],
},
{
id: 'cn_landline',
label: 'desensitize.rules.cn_landline',
pattern: '(?<!\\d)0\\d{2,3}-?\\d{7,8}(?!\\d)',
replacement: '[座机号]',
enabled: false,
builtin: true,
locales: ['zh-CN'],
},
// ==================== 美国 (en-US) ====================
{
id: 'us_ssn',
label: 'desensitize.rules.us_ssn',
pattern: '(?<!\\d)\\d{3}-\\d{2}-\\d{4}(?!\\d)',
replacement: '[SSN]',
enabled: true,
builtin: true,
locales: ['en-US'],
},
{
id: 'us_phone',
label: 'desensitize.rules.us_phone',
pattern: '(?<!\\d)(?:\\+?1[-\\s.]?)?\\(?\\d{3}\\)?[-\\s.]?\\d{3}[-\\s.]?\\d{4}(?!\\d)',
replacement: '[Phone]',
enabled: true,
builtin: true,
locales: ['en-US'],
},
{
id: 'us_drivers_license',
label: 'desensitize.rules.us_drivers_license',
pattern: '(?<![A-Z])\\b[A-Z]\\d{7,8}\\b',
replacement: "[Driver's License]",
enabled: false,
builtin: true,
locales: ['en-US'],
},
// ==================== 日本 (ja-JP) ====================
{
id: 'jp_phone',
label: 'desensitize.rules.jp_phone',
pattern: '(?<!\\d)0[789]0-?\\d{4}-?\\d{4}(?!\\d)',
replacement: '[電話番号]',
enabled: true,
builtin: true,
locales: ['ja-JP'],
},
{
id: 'jp_my_number',
label: 'desensitize.rules.jp_my_number',
pattern: '(?<!\\d)\\d{4}\\s?\\d{4}\\s?\\d{4}(?!\\d)',
replacement: '[マイナンバー]',
enabled: false,
builtin: true,
locales: ['ja-JP'],
},
// ==================== 韩国 (ko-KR) ====================
{
id: 'kr_phone',
label: 'desensitize.rules.kr_phone',
pattern: '(?<!\\d)01[016789]-?\\d{3,4}-?\\d{4}(?!\\d)',
replacement: '[전화번호]',
enabled: true,
builtin: true,
locales: ['ko-KR'],
},
{
id: 'kr_rrn',
label: 'desensitize.rules.kr_rrn',
pattern: '(?<!\\d)\\d{6}-[1-4]\\d{6}(?!\\d)',
replacement: '[주민번호]',
enabled: true,
builtin: true,
locales: ['ko-KR'],
},
// ==================== 凭据 / Token (所有语言) ====================
{
id: 'api_key_prefix',
label: 'desensitize.rules.api_key_prefix',
pattern: '\\b(?:sk-|pk_(?:live|test)_|ghp_|gho_|ghs_|ghu_|glpat-|xoxb-|xoxp-|AKIA)[A-Za-z0-9_\\-]{10,}\\b',
replacement: '[API Key]',
enabled: true,
builtin: true,
locales: [],
},
{
id: 'bearer_token',
label: 'desensitize.rules.bearer_token',
pattern: 'Bearer\\s+[A-Za-z0-9\\-._~+/]+=*',
replacement: 'Bearer [Token]',
enabled: true,
builtin: true,
locales: [],
},
// ==================== 通用 (所有语言) ====================
{
id: 'email',
label: 'desensitize.rules.email',
pattern: '[a-zA-Z0-9._%+\\-]+@[a-zA-Z0-9.\\-]+\\.[a-zA-Z]{2,}',
replacement: '[Email]',
enabled: true,
builtin: true,
locales: [],
},
{
id: 'credit_card',
label: 'desensitize.rules.credit_card',
pattern:
'(?<!\\d)(?:4\\d{3}|5[1-5]\\d{2}|3[47]\\d{2}|6(?:011|5\\d{2}))[-\\s.]?\\d{4}[-\\s.]?\\d{4}[-\\s.]?\\d{4}(?!\\d)',
replacement: '[Credit Card]',
enabled: false,
builtin: true,
locales: [],
},
{
id: 'ipv4',
label: 'desensitize.rules.ipv4',
pattern:
'(?<!\\d)(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)(?!\\d)',
replacement: '[IP]',
enabled: false,
builtin: true,
locales: [],
},
{
id: 'url',
label: 'desensitize.rules.url',
pattern: 'https?://[^\\s<>"]+',
replacement: '[URL]',
enabled: false,
builtin: true,
locales: [],
},
]
/**
* 获取指定 locale 的默认规则(当前 locale 特定 + 通用规则)
*/
export function getDefaultRulesForLocale(locale: string): DesensitizeRule[] {
return BUILTIN_DESENSITIZE_RULES.filter((rule) => rule.locales.length === 0 || rule.locales.includes(locale)).map(
(rule) => ({ ...rule })
)
}
/**
* 合并新 locale 的规则到现有规则列表
* 不重复添加已有 id,不修改已有规则的 enabled 状态
*/
export function mergeRulesForLocale(existing: DesensitizeRule[], locale: string): DesensitizeRule[] {
const existingIds = new Set(existing.map((r) => r.id))
const newRules = BUILTIN_DESENSITIZE_RULES.filter(
(rule) => !existingIds.has(rule.id) && (rule.locales.length === 0 || rule.locales.includes(locale))
).map((rule) => ({ ...rule }))
return [...existing, ...newRules]
}
+3
View File
@@ -0,0 +1,3 @@
export type { PreprocessConfig, PreprocessableMessage, DesensitizeRule } from './types'
export { preprocessMessages } from './pipeline'
export { BUILTIN_DESENSITIZE_RULES, getDefaultRulesForLocale, mergeRulesForLocale } from './builtin-rules'
+272
View File
@@ -0,0 +1,272 @@
/**
* 预处理管道
* 执行顺序:数据清洗 → 黑名单 → 去噪 → 合并 → 脱敏
*/
import type { PreprocessConfig, PreprocessableMessage, DesensitizeRule } from './types'
import { aiLogger } from '../logger'
const MERGE_WINDOW_DEFAULT = 180
/**
* 对消息数组执行预处理管道
*/
export function preprocessMessages<T extends PreprocessableMessage>(messages: T[], config?: PreprocessConfig): T[] {
if (!config || !hasAnyEnabled(config)) return messages
if (messages.length === 0) return messages
const inputCount = messages.length
let result: T[] = [...messages]
const applied: string[] = []
if (config.dataCleaning !== false) {
const cleaned = applyDataCleaning(result)
if (cleaned.changed > 0) {
result = cleaned.messages
applied.push(`dataCleaning: ${cleaned.changed} messages cleaned`)
}
}
if (config.blacklistKeywords.length > 0) {
const before = result.length
result = applyBlacklistFilter(result, config.blacklistKeywords)
applied.push(`blacklist: ${before}${result.length} (-${before - result.length})`)
}
if (config.denoise) {
const before = result.length
result = applyDenoise(result)
applied.push(`denoise: ${before}${result.length} (-${before - result.length})`)
}
if (config.mergeConsecutive) {
const before = result.length
result = applyMergeConsecutive(result, config.mergeWindowSeconds ?? MERGE_WINDOW_DEFAULT)
applied.push(`merge: ${before}${result.length} (-${before - result.length})`)
}
if (config.desensitize) {
const enabledRules = (config.desensitizeRules || []).filter((r) => r.enabled)
if (enabledRules.length > 0) {
result = applyDesensitize(result, enabledRules)
applied.push(`desensitize: ${enabledRules.length} rules applied`)
}
}
aiLogger.info('Preprocess', `Pipeline: ${inputCount}${result.length} messages`, {
strategies: applied,
})
return result
}
function hasAnyEnabled(config: PreprocessConfig): boolean {
return (
config.dataCleaning !== false ||
config.mergeConsecutive ||
config.blacklistKeywords.length > 0 ||
config.denoise ||
config.desensitize
)
}
// ==================== 策略实现 ====================
/**
* 数据清洗:将 XML 卡片消息(美团分享、音乐、公众号等)提取为简洁文本
* 微信 XML 消息以 <?xml 或 <msg> 或 <msg>< 开头
*/
function applyDataCleaning<T extends PreprocessableMessage>(messages: T[]): { messages: T[]; changed: number } {
let changed = 0
const result = messages.map((msg) => {
if (!msg.content) return msg
const cleaned = cleanXmlContent(msg.content)
if (cleaned === msg.content) return msg
changed++
return { ...msg, content: cleaned }
})
return { messages: result, changed }
}
const XML_START = /^<\?xml\s|^<msg[\s>]/
function cleanXmlContent(content: string): string {
const trimmed = content.trim()
if (!XML_START.test(trimmed)) return content
const title = extractXmlTag(trimmed, 'title')
const des = extractXmlTag(trimmed, 'des')
if (title) {
return des ? `[分享] ${title} - ${des}` : `[分享] ${title}`
}
return '[应用消息]'
}
function extractXmlTag(xml: string, tag: string): string | null {
const openTag = `<${tag}>`
const closeTag = `</${tag}>`
const start = xml.indexOf(openTag)
if (start === -1) return null
const contentStart = start + openTag.length
const end = xml.indexOf(closeTag, contentStart)
if (end === -1) return null
const value = xml.slice(contentStart, end).trim()
return value.length > 0 ? value : null
}
/**
* 黑名单过滤:消息内容包含任一关键词则整条移除
*/
function applyBlacklistFilter<T extends PreprocessableMessage>(messages: T[], keywords: string[]): T[] {
if (keywords.length === 0) return messages
const lowerKeywords = keywords.map((k) => k.toLowerCase())
return messages.filter((msg) => {
if (!msg.content) return true
const lower = msg.content.toLowerCase()
return !lowerKeywords.some((kw) => lower.includes(kw))
})
}
/**
* 智能去噪:过滤无意义消息
* - 内容长度 < 2 的纯文本(如 "嗯"、"哦"
* - 纯表情消息
* - 系统占位符([图片]、[视频]、[语音] 等)
* 回复保护:有 replyToMessageId 的消息不过滤
*/
function applyDenoise<T extends PreprocessableMessage>(messages: T[]): T[] {
return messages.filter((msg) => {
if (!msg.content) return false
if (msg.replyToMessageId) return true
const content = msg.content.trim()
if (content.length === 0) return false
if (content.length < 2) return false
if (SYSTEM_PLACEHOLDERS.has(content)) return false
if (isPureEmoji(content)) return false
return true
})
}
const SYSTEM_PLACEHOLDERS = new Set([
'[图片]',
'[视频]',
'[语音]',
'[文件]',
'[动画表情]',
'[表情]',
'[链接]',
'[位置]',
'[名片]',
'[红包]',
'[转账]',
'[音乐]',
'[Image]',
'[Video]',
'[Voice]',
'[File]',
'[Sticker]',
'[Link]',
])
function isPureEmoji(str: string): boolean {
const stripped = str
.replace(/\p{Emoji_Presentation}/gu, '')
.replace(/\p{Extended_Pictographic}/gu, '')
.replace(/\u200d/g, '')
.replace(/\ufe0f/g, '')
.replace(/\u20e3/g, '')
.replace(/\s/g, '')
return stripped.length === 0
}
/**
* 合并连续发言:同一发送者在时间窗口内的连续消息合并为一条
* 使用 senderPlatformId(如可用),否则 fallback 到 senderName
*/
function applyMergeConsecutive<T extends PreprocessableMessage>(messages: T[], windowSeconds: number): T[] {
if (messages.length <= 1) return messages
const merged: T[] = []
let current: T | null = null
for (const msg of messages) {
if (!current) {
current = { ...msg }
continue
}
const sameSender = isSameSender(current, msg)
const withinWindow = Math.abs(msg.timestamp - current.timestamp) <= windowSeconds
if (sameSender && withinWindow) {
current = {
...current,
content: [current.content, msg.content].filter(Boolean).join('\n'),
}
} else {
merged.push(current)
current = { ...msg }
}
}
if (current) merged.push(current)
return merged
}
function isSameSender(a: PreprocessableMessage, b: PreprocessableMessage): boolean {
if (a.senderPlatformId && b.senderPlatformId) {
return a.senderPlatformId === b.senderPlatformId
}
return a.senderName === b.senderName
}
/**
* 数据脱敏:按规则列表顺序依次替换敏感信息
* 规则按列表优先级排序,先匹配的先替换
* 自定义正则有超时保护(单条规则 50ms 上限)
*/
function applyDesensitize<T extends PreprocessableMessage>(messages: T[], rules: DesensitizeRule[]): T[] {
const compiledRules = compileRules(rules)
if (compiledRules.length === 0) return messages
return messages.map((msg) => {
if (!msg.content) return msg
let content = msg.content
for (const { regex, replacement } of compiledRules) {
regex.lastIndex = 0
content = content.replace(regex, replacement)
}
if (content === msg.content) return msg
return { ...msg, content }
})
}
const regexCache = new Map<string, RegExp | null>()
function compileRules(rules: DesensitizeRule[]): Array<{ regex: RegExp; replacement: string }> {
const result: Array<{ regex: RegExp; replacement: string }> = []
for (const rule of rules) {
let regex = regexCache.get(rule.pattern)
if (regex === undefined) {
try {
regex = new RegExp(rule.pattern, 'g')
regexCache.set(rule.pattern, regex)
} catch {
aiLogger.warn('Preprocess', `Invalid regex in desensitize rule "${rule.id}": ${rule.pattern}`)
regexCache.set(rule.pattern, null)
continue
}
}
if (regex) {
result.push({ regex, replacement: rule.replacement })
}
}
return result
}
+48
View File
@@ -0,0 +1,48 @@
/** 单条脱敏规则 */
export interface DesensitizeRule {
/** 唯一标识(预置规则用固定 id,自定义规则用 uuid) */
id: string
/** 显示名称 */
label: string
/** 正则表达式字符串(运行时 new RegExp(pattern, 'g') */
pattern: string
/** 替换文本 */
replacement: string
/** 是否启用 */
enabled: boolean
/** 是否为预置规则(预置规则不可删除,仅可启用/禁用) */
builtin: boolean
/** 适用的 locale 列表(空数组表示通用) */
locales: string[]
}
/** 预处理配置 */
export interface PreprocessConfig {
/** 数据清洗:清理 XML 卡片消息等非纯文本内容(默认开启) */
dataCleaning: boolean
/** 合并连续发言(同发送者 + 时间间隔 < mergeWindowSeconds */
mergeConsecutive: boolean
/** 合并窗口(秒),默认 180 */
mergeWindowSeconds?: number
/** 自定义黑名单关键词,包含任一关键词的消息将被整条过滤 */
blacklistKeywords: string[]
/** 智能去噪(过滤纯语气词、纯表情、系统占位符) */
denoise: boolean
/** 数据脱敏总开关 */
desensitize: boolean
/** 脱敏规则列表(预置 + 自定义,按优先级排序) */
desensitizeRules: DesensitizeRule[]
/** 昵称匿名化:用 U{id} 替代真实昵称,减少 AI 幻觉 */
anonymizeNames: boolean
}
/** 预处理管道可接受的消息结构(兼容 SearchMessageResult / SessionMessagesResult.messages */
export interface PreprocessableMessage {
id?: number
senderId?: number
senderName: string
senderPlatformId?: string
content: string | null
timestamp: number
replyToMessageId?: string | null
}
@@ -4,7 +4,7 @@ import type { ToolContext } from '../types'
import { timeParamProperties } from '../utils/schemas'
import * as workerManager from '../../../worker/workerManager'
import { parseExtendedTimeParams } from '../utils/time-params'
import { formatTimeRange, formatMessageCompact, t } from '../utils/format'
import { formatTimeRange, t } from '../utils/format'
const schema = Type.Object({
member_id_1: Type.Number({ description: 'ai.tools.get_conversation_between.params.member_id_1' }),
@@ -51,7 +51,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
member1: result.member1Name,
member2: result.member2Name,
timeRange: formatTimeRange(effectiveTimeFilter, locale),
conversation: result.messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: result.messages,
}
return {
@@ -2,7 +2,7 @@ import { Type } from '@mariozechner/pi-ai'
import type { AgentTool } from '@mariozechner/pi-agent-core'
import type { ToolContext } from '../types'
import * as workerManager from '../../../worker/workerManager'
import { formatMessageCompact, t } from '../utils/format'
import { t } from '../utils/format'
const schema = Type.Object({
message_ids: Type.Array(Type.Number(), { description: 'ai.tools.get_message_context.params.message_ids' }),
@@ -37,7 +37,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
totalMessages: messages.length,
contextSize: contextSize,
requestedMessageIds: params.message_ids,
messages: messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: messages,
}
return {
@@ -3,7 +3,7 @@ import type { AgentTool } from '@mariozechner/pi-agent-core'
import type { ToolContext } from '../types'
import * as workerManager from '../../../worker/workerManager'
import { parseExtendedTimeParams } from '../utils/time-params'
import { formatTimeRange, formatMessageCompact } from '../utils/format'
import { formatTimeRange } from '../utils/format'
import { timeParamProperties } from '../utils/schemas'
const schema = Type.Object({
@@ -29,7 +29,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
total: result.total,
returned: result.messages.length,
timeRange: formatTimeRange(effectiveTimeFilter, locale),
messages: result.messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: result.messages,
}
return {
@@ -2,7 +2,7 @@ import { Type } from '@mariozechner/pi-ai'
import type { AgentTool } from '@mariozechner/pi-agent-core'
import type { ToolContext } from '../types'
import * as workerManager from '../../../worker/workerManager'
import { isChineseLocale, formatMessageCompact } from '../utils/format'
import { isChineseLocale } from '../utils/format'
const schema = Type.Object({
session_id: Type.Number({ description: 'ai.tools.get_session_messages.params.session_id' }),
@@ -38,7 +38,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
messageCount: result.messageCount,
returnedCount: result.returnedCount,
participants: result.participants,
messages: result.messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: result.messages,
}
}
@@ -3,7 +3,7 @@ import type { AgentTool } from '@mariozechner/pi-agent-core'
import type { ToolContext } from '../types'
import * as workerManager from '../../../worker/workerManager'
import { parseExtendedTimeParams } from '../utils/time-params'
import { formatTimeRange, formatMessageCompact } from '../utils/format'
import { formatTimeRange } from '../utils/format'
import { timeParamProperties } from '../utils/schemas'
const schema = Type.Object({
@@ -38,7 +38,7 @@ export function createTool(context: ToolContext): AgentTool<typeof schema> {
total: result.total,
returned: result.messages.length,
timeRange: formatTimeRange(effectiveTimeFilter, locale),
messages: result.messages.map((m) => formatMessageCompact(m, locale)),
rawMessages: result.messages,
}
return {
+133 -2
View File
@@ -1,6 +1,8 @@
/**
* AI Tools 模块入口
* 工具创建与管理
* 工具创建、预处理管道与管理
*
* 架构:工具返回结构化数据(rawMessages) → 处理层执行预处理 + 格式化 → 生成 LLM 内容
*/
import type { AgentTool } from '@mariozechner/pi-agent-core'
@@ -21,6 +23,8 @@ import {
} from './definitions'
import { isEmbeddingEnabled } from '../rag'
import { t as i18nT } from '../../i18n'
import { preprocessMessages, type PreprocessableMessage } from '../preprocessor'
import { formatMessageCompact } from './utils/format'
// 导出类型
export * from './types'
@@ -41,6 +45,57 @@ const coreFactories: ToolFactory[] = [
createGetSessionSummaries,
]
/**
* 将工具返回的结构化数据格式化为 LLM 友好的纯文本
*
* 从 JSON.stringify 改为纯文本,节省 token 且更易于 LLM 理解。
* 元数据作为头部,消息逐行排列。
*/
function formatToolResultAsText(details: Record<string, unknown>): string {
const lines: string[] = []
const messages = details.messages as string[] | undefined
for (const [key, value] of Object.entries(details)) {
if (key === 'messages') continue
if (value === undefined || value === null) continue
if (typeof value === 'object') {
if ('start' in (value as Record<string, unknown>) && 'end' in (value as Record<string, unknown>)) {
const range = value as { start: string; end: string }
lines.push(`${key}: ${range.start} ~ ${range.end}`)
} else if (Array.isArray(value)) {
lines.push(`${key}: ${value.join(', ')}`)
} else {
lines.push(`${key}: ${JSON.stringify(value)}`)
}
} else {
lines.push(`${key}: ${value}`)
}
}
if (messages && messages.length > 0) {
lines.push('')
let lastDate = ''
for (const msg of messages) {
const spaceIdx = msg.indexOf(' ')
const secondSpaceIdx = msg.indexOf(' ', spaceIdx + 1)
if (spaceIdx > 0 && secondSpaceIdx > 0) {
const date = msg.slice(0, spaceIdx)
const rest = msg.slice(spaceIdx + 1)
if (date !== lastDate) {
lines.push(`--- ${date} ---`)
lastDate = date
}
lines.push(rest)
} else {
lines.push(msg)
}
}
}
return lines.join('\n')
}
/**
* 翻译 AgentTool 的描述(工具级 + 参数级)
*
@@ -71,11 +126,87 @@ function translateTool(tool: AgentTool<any>): AgentTool<any> {
}
}
/**
* 预处理包装层
* 拦截工具的 execute 结果:如果 details 中包含 rawMessages
* 则执行预处理管道 + 格式化,替换为最终的 LLM 内容
*
* 工具约定:返回消息的工具在 details 中放置 rawMessages 字段(结构化消息数组),
* 处理层负责 preprocess + formatMessageCompact,工具无需感知预处理逻辑。
*/
function wrapWithPreprocessing(tool: AgentTool<any>, context: ToolContext): AgentTool<any> {
const originalExecute = tool.execute
return {
...tool,
execute: async (toolCallId: string, params: any) => {
const result = await originalExecute(toolCallId, params)
const details = result.details as Record<string, unknown> | undefined
if (!details?.rawMessages || !Array.isArray(details.rawMessages)) {
return result
}
const raw = details.rawMessages as PreprocessableMessage[]
const processed = preprocessMessages(raw, context.preprocessConfig)
let nameMapLine = ''
if (context.preprocessConfig?.anonymizeNames) {
nameMapLine = anonymizeMessageNames(processed, context.ownerInfo?.platformId)
}
const formatted = processed.map((m) => formatMessageCompact(m, context.locale))
const finalDetails = { ...details, messages: formatted, returned: processed.length }
delete finalDetails.rawMessages
let textContent = formatToolResultAsText(finalDetails)
if (nameMapLine) {
textContent = nameMapLine + '\n' + textContent
}
return {
content: [{ type: 'text' as const, text: textContent }],
details: finalDetails,
}
},
}
}
/**
* 昵称匿名化:用 U{senderId} 替代真实昵称
* 就地修改 messages 的 senderName,返回映射表文本行
*/
function anonymizeMessageNames(messages: PreprocessableMessage[], ownerPlatformId?: string): string {
const nameMap = new Map<number, { name: string; platformId?: string }>()
for (const msg of messages) {
if (msg.senderId != null && !nameMap.has(msg.senderId)) {
nameMap.set(msg.senderId, { name: msg.senderName, platformId: msg.senderPlatformId })
}
}
if (nameMap.size === 0) return ''
for (const msg of messages) {
if (msg.senderId != null) {
msg.senderName = `U${msg.senderId}`
}
}
const entries: string[] = []
for (const [id, { name, platformId }] of nameMap) {
const isOwner = ownerPlatformId && platformId === ownerPlatformId
entries.push(`U${id}=${name}${isOwner ? '(owner)' : ''}`)
}
return `[Name Map] ${entries.join(' | ')}`
}
/**
* 获取所有可用的 AgentTool
*
* 根据配置动态过滤工具(如:语义搜索工具仅在启用 Embedding 时可用)
* 根据当前 locale 动态翻译工具描述
* 统一包装预处理层
*/
export function getAllTools(context: ToolContext): AgentTool<any>[] {
const tools: AgentTool<any>[] = coreFactories.map((f) => f(context))
@@ -84,5 +215,5 @@ export function getAllTools(context: ToolContext): AgentTool<any>[] {
tools.push(createSemanticSearchMessages(context))
}
return tools.map(translateTool)
return tools.map(translateTool).map((t) => wrapWithPreprocessing(t, context))
}
+4
View File
@@ -2,6 +2,8 @@
* AI Tools 类型定义
*/
import type { PreprocessConfig } from '../preprocessor'
/** Owner 信息(当前用户在对话中的身份) */
export interface OwnerInfo {
/** Owner 的 platformId */
@@ -30,4 +32,6 @@ export interface ToolContext {
ownerInfo?: OwnerInfo
/** 语言环境(用于工具返回结果的国际化) */
locale?: string
/** 聊天记录预处理配置(全局) */
preprocessConfig?: PreprocessConfig
}
+10
View File
@@ -14,6 +14,7 @@ import { ensureAvatarColumn } from './basic'
*/
export interface MessageResult {
id: number
senderId: number
senderName: string
senderPlatformId: string
senderAliases: string[]
@@ -49,6 +50,7 @@ export interface MessagesWithTotal {
*/
interface DbMessageRow {
id: number
senderId: number
senderName: string
senderPlatformId: string
aliases: string | null
@@ -78,6 +80,7 @@ function sanitizeMessageRow(row: DbMessageRow): MessageResult {
return {
id: Number(row.id),
senderId: Number(row.senderId),
senderName: String(row.senderName || ''),
senderPlatformId: String(row.senderPlatformId || ''),
senderAliases: aliases,
@@ -155,6 +158,7 @@ export function getRecentMessages(sessionId: string, filter?: TimeFilter, limit:
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -218,6 +222,7 @@ export function getAllRecentMessages(sessionId: string, filter?: TimeFilter, lim
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -301,6 +306,7 @@ export function searchMessages(
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -391,6 +397,7 @@ export function getMessageContext(
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -450,6 +457,7 @@ export function getMessagesBefore(
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -522,6 +530,7 @@ export function getMessagesAfter(
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -622,6 +631,7 @@ export function getConversationBetween(
const sql = `
SELECT
msg.id,
m.id as senderId,
COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName,
m.platform_id as senderPlatformId,
m.aliases,
@@ -78,7 +78,9 @@ export function searchSessions(
const previewSql = `
SELECT
m.id,
mb.id as senderId,
COALESCE(mb.group_nickname, mb.account_name, mb.platform_id) as senderName,
mb.platform_id as senderPlatformId,
m.content,
m.ts as timestamp
FROM message_context mc
@@ -93,7 +95,9 @@ export function searchSessions(
for (const session of sessions) {
const previewMessages = db.prepare(previewSql).all(session.id, previewCount) as Array<{
id: number
senderId: number
senderName: string
senderPlatformId: string
content: string | null
timestamp: number
}>
@@ -164,7 +168,9 @@ export function getSessionMessages(
const messagesSql = `
SELECT
m.id,
mb.id as senderId,
COALESCE(mb.group_nickname, mb.account_name, mb.platform_id) as senderName,
mb.platform_id as senderPlatformId,
m.content,
m.ts as timestamp
FROM message_context mc
@@ -176,7 +182,9 @@ export function getSessionMessages(
`
const messages = db.prepare(messagesSql).all(chatSessionId, limit) as Array<{
id: number
senderId: number
senderName: string
senderPlatformId: string
content: string | null
timestamp: number
}>
+68 -2
View File
@@ -100,10 +100,37 @@ export interface AgentResult {
toolRounds: number
}
/** 单条脱敏规则 */
export interface DesensitizeRule {
id: string
label: string
pattern: string
replacement: string
enabled: boolean
builtin: boolean
locales: string[]
}
/** 聊天记录预处理配置 */
export interface PreprocessConfig {
dataCleaning: boolean
mergeConsecutive: boolean
mergeWindowSeconds?: number
blacklistKeywords: string[]
denoise: boolean
desensitize: boolean
desensitizeRules: DesensitizeRule[]
anonymizeNames: boolean
}
export interface ToolContext {
sessionId: string
conversationId?: string
timeFilter?: { startTs: number; endTs: number }
maxMessagesLimit?: number
ownerInfo?: { platformId: string; displayName: string }
locale?: string
preprocessConfig?: PreprocessConfig
}
// AI 服务配置类型(前端用)
@@ -463,6 +490,14 @@ export const aiApi = {
showAiLogFile: (): Promise<{ success: boolean; path?: string; error?: string }> => {
return ipcRenderer.invoke('ai:showLogFile')
},
getDefaultDesensitizeRules: (locale: string): Promise<DesensitizeRule[]> => {
return ipcRenderer.invoke('ai:getDefaultDesensitizeRules', locale)
},
mergeDesensitizeRules: (existingRules: DesensitizeRule[], locale: string): Promise<DesensitizeRule[]> => {
return ipcRenderer.invoke('ai:mergeDesensitizeRules', existingRules, locale)
},
}
// ==================== LLM API ====================
@@ -647,12 +682,34 @@ export const agentApi = {
locale?: string,
maxHistoryRounds?: number
): { requestId: string; promise: Promise<{ success: boolean; result?: AgentResult; error?: string }> } => {
// 防御性处理:确保传给 IPC 的 context 是“可结构化克隆”的纯对象
// 避免调用方误传入响应式 Proxy(例如 Pinia/Vue state)导致 invoke 失败
const sanitizedContext: ToolContext = {
sessionId: context.sessionId,
conversationId: context.conversationId,
timeFilter: context.timeFilter
? {
startTs: context.timeFilter.startTs,
endTs: context.timeFilter.endTs,
}
: undefined,
maxMessagesLimit: context.maxMessagesLimit,
ownerInfo: context.ownerInfo
? {
platformId: context.ownerInfo.platformId,
displayName: context.ownerInfo.displayName,
}
: undefined,
locale: context.locale,
preprocessConfig: context.preprocessConfig,
}
const requestId = `agent_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`
console.log(
'[preload] Agent runStream 开始,requestId:',
requestId,
'conversationId:',
context.conversationId ?? 'none',
sanitizedContext.conversationId ?? 'none',
'chatType:',
chatType ?? 'group',
'hasPromptConfig:',
@@ -694,7 +751,16 @@ export const agentApi = {
ipcRenderer.on('agent:complete', completeHandler)
ipcRenderer
.invoke('agent:runStream', requestId, userMessage, context, chatType, promptConfig, locale, maxHistoryRounds)
.invoke(
'agent:runStream',
requestId,
userMessage,
sanitizedContext,
chatType,
promptConfig,
locale,
maxHistoryRounds
)
.then((result) => {
console.log('[preload] Agent invoke 返回:', result)
if (!result.success) {
+31
View File
@@ -373,6 +373,8 @@ interface AiApi {
getMessages: (conversationId: string) => Promise<AIMessage[]>
deleteMessage: (messageId: string) => Promise<boolean>
showAiLogFile: () => Promise<{ success: boolean; path?: string; error?: string }>
getDefaultDesensitizeRules: (locale: string) => Promise<DesensitizeRule[]>
mergeDesensitizeRules: (existingRules: DesensitizeRule[], locale: string) => Promise<DesensitizeRule[]>
// 自定义筛选(支持分页)
filterMessagesWithContext: (
sessionId: string,
@@ -617,6 +619,29 @@ interface OwnerInfo {
displayName: string
}
/** 单条脱敏规则 */
interface DesensitizeRule {
id: string
label: string
pattern: string
replacement: string
enabled: boolean
builtin: boolean
locales: string[]
}
/** 聊天记录预处理配置 */
interface PreprocessConfig {
dataCleaning: boolean
mergeConsecutive: boolean
mergeWindowSeconds?: number
blacklistKeywords: string[]
denoise: boolean
desensitize: boolean
desensitizeRules: DesensitizeRule[]
anonymizeNames: boolean
}
interface ToolContext {
sessionId: string
conversationId?: string
@@ -625,6 +650,10 @@ interface ToolContext {
maxMessagesLimit?: number
/** Owner 信息(当前用户在对话中的身份) */
ownerInfo?: OwnerInfo
/** 语言环境 */
locale?: string
/** 聊天记录预处理配置 */
preprocessConfig?: PreprocessConfig
}
// 用户自定义提示词配置
@@ -871,6 +900,8 @@ export {
AgentRuntimeStatus,
AgentResult,
ToolContext,
DesensitizeRule,
PreprocessConfig,
PromptConfig,
TokenUsage,
CacheDirectoryInfo,