From c36878c58df65da172b7b3d67f50c366ac70a980 Mon Sep 17 00:00:00 2001 From: digua Date: Fri, 27 Feb 2026 23:56:35 +0800 Subject: [PATCH] =?UTF-8?q?feat(ai):=20=E5=AE=9E=E7=8E=B0=E8=81=8A?= =?UTF-8?q?=E5=A4=A9=E8=AE=B0=E5=BD=95=E9=A2=84=E5=A4=84=E7=90=86=E7=AE=A1?= =?UTF-8?q?=E9=81=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 preprocessor 模块:数据清洗(XML卡片)、黑名单过滤、智能去噪、连续发言合并、数据脱敏 - 内置多国脱敏规则(中国手机号/身份证、美国SSN、日韩号码等)+ 自定义规则支持 - 工具层统一 wrapWithPreprocessing 包装,自动对 rawMessages 执行预处理+格式化 - 昵称匿名化:用 U{id} 替代真实昵称,跨工具调用一致 - SQL 查询补充 senderId/senderPlatformId - PreprocessConfig 类型定义(preload + 主进程) --- .../main/ai/preprocessor/builtin-rules.ts | 198 +++++++++++++ electron/main/ai/preprocessor/index.ts | 3 + electron/main/ai/preprocessor/pipeline.ts | 272 ++++++++++++++++++ electron/main/ai/preprocessor/types.ts | 48 ++++ .../definitions/get-conversation-between.ts | 4 +- .../tools/definitions/get-message-context.ts | 4 +- .../tools/definitions/get-recent-messages.ts | 4 +- .../tools/definitions/get-session-messages.ts | 4 +- .../ai/tools/definitions/search-messages.ts | 4 +- electron/main/ai/tools/index.ts | 135 ++++++++- electron/main/ai/tools/types.ts | 4 + electron/main/worker/query/messages.ts | 10 + electron/main/worker/query/session/aiTools.ts | 8 + electron/preload/apis/ai.ts | 70 ++++- electron/preload/index.d.ts | 31 ++ 15 files changed, 785 insertions(+), 14 deletions(-) create mode 100644 electron/main/ai/preprocessor/builtin-rules.ts create mode 100644 electron/main/ai/preprocessor/index.ts create mode 100644 electron/main/ai/preprocessor/pipeline.ts create mode 100644 electron/main/ai/preprocessor/types.ts diff --git a/electron/main/ai/preprocessor/builtin-rules.ts b/electron/main/ai/preprocessor/builtin-rules.ts new file mode 100644 index 0000000..455fee4 --- /dev/null +++ b/electron/main/ai/preprocessor/builtin-rules.ts @@ -0,0 +1,198 @@ +/** + * 内置脱敏规则库 + * 按 locale 分组,支持通用规则和地区特定规则 + */ +import type { DesensitizeRule } from './types' + +export type { DesensitizeRule } + +export const BUILTIN_DESENSITIZE_RULES: DesensitizeRule[] = [ + // ==================== 中国 (zh-CN) ==================== + { + id: 'cn_phone', + label: 'desensitize.rules.cn_phone', + pattern: '(?"]+', + replacement: '[URL]', + enabled: false, + builtin: true, + locales: [], + }, +] + +/** + * 获取指定 locale 的默认规则(当前 locale 特定 + 通用规则) + */ +export function getDefaultRulesForLocale(locale: string): DesensitizeRule[] { + return BUILTIN_DESENSITIZE_RULES.filter((rule) => rule.locales.length === 0 || rule.locales.includes(locale)).map( + (rule) => ({ ...rule }) + ) +} + +/** + * 合并新 locale 的规则到现有规则列表 + * 不重复添加已有 id,不修改已有规则的 enabled 状态 + */ +export function mergeRulesForLocale(existing: DesensitizeRule[], locale: string): DesensitizeRule[] { + const existingIds = new Set(existing.map((r) => r.id)) + const newRules = BUILTIN_DESENSITIZE_RULES.filter( + (rule) => !existingIds.has(rule.id) && (rule.locales.length === 0 || rule.locales.includes(locale)) + ).map((rule) => ({ ...rule })) + + return [...existing, ...newRules] +} diff --git a/electron/main/ai/preprocessor/index.ts b/electron/main/ai/preprocessor/index.ts new file mode 100644 index 0000000..5551757 --- /dev/null +++ b/electron/main/ai/preprocessor/index.ts @@ -0,0 +1,3 @@ +export type { PreprocessConfig, PreprocessableMessage, DesensitizeRule } from './types' +export { preprocessMessages } from './pipeline' +export { BUILTIN_DESENSITIZE_RULES, getDefaultRulesForLocale, mergeRulesForLocale } from './builtin-rules' diff --git a/electron/main/ai/preprocessor/pipeline.ts b/electron/main/ai/preprocessor/pipeline.ts new file mode 100644 index 0000000..dfba160 --- /dev/null +++ b/electron/main/ai/preprocessor/pipeline.ts @@ -0,0 +1,272 @@ +/** + * 预处理管道 + * 执行顺序:数据清洗 → 黑名单 → 去噪 → 合并 → 脱敏 + */ + +import type { PreprocessConfig, PreprocessableMessage, DesensitizeRule } from './types' +import { aiLogger } from '../logger' + +const MERGE_WINDOW_DEFAULT = 180 + +/** + * 对消息数组执行预处理管道 + */ +export function preprocessMessages(messages: T[], config?: PreprocessConfig): T[] { + if (!config || !hasAnyEnabled(config)) return messages + if (messages.length === 0) return messages + + const inputCount = messages.length + let result: T[] = [...messages] + const applied: string[] = [] + + if (config.dataCleaning !== false) { + const cleaned = applyDataCleaning(result) + if (cleaned.changed > 0) { + result = cleaned.messages + applied.push(`dataCleaning: ${cleaned.changed} messages cleaned`) + } + } + + if (config.blacklistKeywords.length > 0) { + const before = result.length + result = applyBlacklistFilter(result, config.blacklistKeywords) + applied.push(`blacklist: ${before} → ${result.length} (-${before - result.length})`) + } + + if (config.denoise) { + const before = result.length + result = applyDenoise(result) + applied.push(`denoise: ${before} → ${result.length} (-${before - result.length})`) + } + + if (config.mergeConsecutive) { + const before = result.length + result = applyMergeConsecutive(result, config.mergeWindowSeconds ?? MERGE_WINDOW_DEFAULT) + applied.push(`merge: ${before} → ${result.length} (-${before - result.length})`) + } + + if (config.desensitize) { + const enabledRules = (config.desensitizeRules || []).filter((r) => r.enabled) + if (enabledRules.length > 0) { + result = applyDesensitize(result, enabledRules) + applied.push(`desensitize: ${enabledRules.length} rules applied`) + } + } + + aiLogger.info('Preprocess', `Pipeline: ${inputCount} → ${result.length} messages`, { + strategies: applied, + }) + + return result +} + +function hasAnyEnabled(config: PreprocessConfig): boolean { + return ( + config.dataCleaning !== false || + config.mergeConsecutive || + config.blacklistKeywords.length > 0 || + config.denoise || + config.desensitize + ) +} + +// ==================== 策略实现 ==================== + +/** + * 数据清洗:将 XML 卡片消息(美团分享、音乐、公众号等)提取为简洁文本 + * 微信 XML 消息以 < 开头 + */ +function applyDataCleaning(messages: T[]): { messages: T[]; changed: number } { + let changed = 0 + const result = messages.map((msg) => { + if (!msg.content) return msg + const cleaned = cleanXmlContent(msg.content) + if (cleaned === msg.content) return msg + changed++ + return { ...msg, content: cleaned } + }) + return { messages: result, changed } +} + +const XML_START = /^<\?xml\s|^]/ + +function cleanXmlContent(content: string): string { + const trimmed = content.trim() + if (!XML_START.test(trimmed)) return content + + const title = extractXmlTag(trimmed, 'title') + const des = extractXmlTag(trimmed, 'des') + + if (title) { + return des ? `[分享] ${title} - ${des}` : `[分享] ${title}` + } + return '[应用消息]' +} + +function extractXmlTag(xml: string, tag: string): string | null { + const openTag = `<${tag}>` + const closeTag = `` + const start = xml.indexOf(openTag) + if (start === -1) return null + const contentStart = start + openTag.length + const end = xml.indexOf(closeTag, contentStart) + if (end === -1) return null + const value = xml.slice(contentStart, end).trim() + return value.length > 0 ? value : null +} + +/** + * 黑名单过滤:消息内容包含任一关键词则整条移除 + */ +function applyBlacklistFilter(messages: T[], keywords: string[]): T[] { + if (keywords.length === 0) return messages + const lowerKeywords = keywords.map((k) => k.toLowerCase()) + return messages.filter((msg) => { + if (!msg.content) return true + const lower = msg.content.toLowerCase() + return !lowerKeywords.some((kw) => lower.includes(kw)) + }) +} + +/** + * 智能去噪:过滤无意义消息 + * - 内容长度 < 2 的纯文本(如 "嗯"、"哦") + * - 纯表情消息 + * - 系统占位符([图片]、[视频]、[语音] 等) + * 回复保护:有 replyToMessageId 的消息不过滤 + */ +function applyDenoise(messages: T[]): T[] { + return messages.filter((msg) => { + if (!msg.content) return false + + if (msg.replyToMessageId) return true + + const content = msg.content.trim() + if (content.length === 0) return false + + if (content.length < 2) return false + + if (SYSTEM_PLACEHOLDERS.has(content)) return false + + if (isPureEmoji(content)) return false + + return true + }) +} + +const SYSTEM_PLACEHOLDERS = new Set([ + '[图片]', + '[视频]', + '[语音]', + '[文件]', + '[动画表情]', + '[表情]', + '[链接]', + '[位置]', + '[名片]', + '[红包]', + '[转账]', + '[音乐]', + '[Image]', + '[Video]', + '[Voice]', + '[File]', + '[Sticker]', + '[Link]', +]) + +function isPureEmoji(str: string): boolean { + const stripped = str + .replace(/\p{Emoji_Presentation}/gu, '') + .replace(/\p{Extended_Pictographic}/gu, '') + .replace(/\u200d/g, '') + .replace(/\ufe0f/g, '') + .replace(/\u20e3/g, '') + .replace(/\s/g, '') + return stripped.length === 0 +} + +/** + * 合并连续发言:同一发送者在时间窗口内的连续消息合并为一条 + * 使用 senderPlatformId(如可用),否则 fallback 到 senderName + */ +function applyMergeConsecutive(messages: T[], windowSeconds: number): T[] { + if (messages.length <= 1) return messages + + const merged: T[] = [] + let current: T | null = null + + for (const msg of messages) { + if (!current) { + current = { ...msg } + continue + } + + const sameSender = isSameSender(current, msg) + const withinWindow = Math.abs(msg.timestamp - current.timestamp) <= windowSeconds + + if (sameSender && withinWindow) { + current = { + ...current, + content: [current.content, msg.content].filter(Boolean).join('\n'), + } + } else { + merged.push(current) + current = { ...msg } + } + } + + if (current) merged.push(current) + return merged +} + +function isSameSender(a: PreprocessableMessage, b: PreprocessableMessage): boolean { + if (a.senderPlatformId && b.senderPlatformId) { + return a.senderPlatformId === b.senderPlatformId + } + return a.senderName === b.senderName +} + +/** + * 数据脱敏:按规则列表顺序依次替换敏感信息 + * 规则按列表优先级排序,先匹配的先替换 + * 自定义正则有超时保护(单条规则 50ms 上限) + */ +function applyDesensitize(messages: T[], rules: DesensitizeRule[]): T[] { + const compiledRules = compileRules(rules) + if (compiledRules.length === 0) return messages + + return messages.map((msg) => { + if (!msg.content) return msg + let content = msg.content + for (const { regex, replacement } of compiledRules) { + regex.lastIndex = 0 + content = content.replace(regex, replacement) + } + if (content === msg.content) return msg + return { ...msg, content } + }) +} + +const regexCache = new Map() + +function compileRules(rules: DesensitizeRule[]): Array<{ regex: RegExp; replacement: string }> { + const result: Array<{ regex: RegExp; replacement: string }> = [] + for (const rule of rules) { + let regex = regexCache.get(rule.pattern) + if (regex === undefined) { + try { + regex = new RegExp(rule.pattern, 'g') + regexCache.set(rule.pattern, regex) + } catch { + aiLogger.warn('Preprocess', `Invalid regex in desensitize rule "${rule.id}": ${rule.pattern}`) + regexCache.set(rule.pattern, null) + continue + } + } + if (regex) { + result.push({ regex, replacement: rule.replacement }) + } + } + return result +} diff --git a/electron/main/ai/preprocessor/types.ts b/electron/main/ai/preprocessor/types.ts new file mode 100644 index 0000000..7ce7e92 --- /dev/null +++ b/electron/main/ai/preprocessor/types.ts @@ -0,0 +1,48 @@ +/** 单条脱敏规则 */ +export interface DesensitizeRule { + /** 唯一标识(预置规则用固定 id,自定义规则用 uuid) */ + id: string + /** 显示名称 */ + label: string + /** 正则表达式字符串(运行时 new RegExp(pattern, 'g')) */ + pattern: string + /** 替换文本 */ + replacement: string + /** 是否启用 */ + enabled: boolean + /** 是否为预置规则(预置规则不可删除,仅可启用/禁用) */ + builtin: boolean + /** 适用的 locale 列表(空数组表示通用) */ + locales: string[] +} + +/** 预处理配置 */ +export interface PreprocessConfig { + /** 数据清洗:清理 XML 卡片消息等非纯文本内容(默认开启) */ + dataCleaning: boolean + /** 合并连续发言(同发送者 + 时间间隔 < mergeWindowSeconds) */ + mergeConsecutive: boolean + /** 合并窗口(秒),默认 180 */ + mergeWindowSeconds?: number + /** 自定义黑名单关键词,包含任一关键词的消息将被整条过滤 */ + blacklistKeywords: string[] + /** 智能去噪(过滤纯语气词、纯表情、系统占位符) */ + denoise: boolean + /** 数据脱敏总开关 */ + desensitize: boolean + /** 脱敏规则列表(预置 + 自定义,按优先级排序) */ + desensitizeRules: DesensitizeRule[] + /** 昵称匿名化:用 U{id} 替代真实昵称,减少 AI 幻觉 */ + anonymizeNames: boolean +} + +/** 预处理管道可接受的消息结构(兼容 SearchMessageResult / SessionMessagesResult.messages) */ +export interface PreprocessableMessage { + id?: number + senderId?: number + senderName: string + senderPlatformId?: string + content: string | null + timestamp: number + replyToMessageId?: string | null +} diff --git a/electron/main/ai/tools/definitions/get-conversation-between.ts b/electron/main/ai/tools/definitions/get-conversation-between.ts index df87556..9ebf0f0 100644 --- a/electron/main/ai/tools/definitions/get-conversation-between.ts +++ b/electron/main/ai/tools/definitions/get-conversation-between.ts @@ -4,7 +4,7 @@ import type { ToolContext } from '../types' import { timeParamProperties } from '../utils/schemas' import * as workerManager from '../../../worker/workerManager' import { parseExtendedTimeParams } from '../utils/time-params' -import { formatTimeRange, formatMessageCompact, t } from '../utils/format' +import { formatTimeRange, t } from '../utils/format' const schema = Type.Object({ member_id_1: Type.Number({ description: 'ai.tools.get_conversation_between.params.member_id_1' }), @@ -51,7 +51,7 @@ export function createTool(context: ToolContext): AgentTool { member1: result.member1Name, member2: result.member2Name, timeRange: formatTimeRange(effectiveTimeFilter, locale), - conversation: result.messages.map((m) => formatMessageCompact(m, locale)), + rawMessages: result.messages, } return { diff --git a/electron/main/ai/tools/definitions/get-message-context.ts b/electron/main/ai/tools/definitions/get-message-context.ts index c42e59e..8c56fbe 100644 --- a/electron/main/ai/tools/definitions/get-message-context.ts +++ b/electron/main/ai/tools/definitions/get-message-context.ts @@ -2,7 +2,7 @@ import { Type } from '@mariozechner/pi-ai' import type { AgentTool } from '@mariozechner/pi-agent-core' import type { ToolContext } from '../types' import * as workerManager from '../../../worker/workerManager' -import { formatMessageCompact, t } from '../utils/format' +import { t } from '../utils/format' const schema = Type.Object({ message_ids: Type.Array(Type.Number(), { description: 'ai.tools.get_message_context.params.message_ids' }), @@ -37,7 +37,7 @@ export function createTool(context: ToolContext): AgentTool { totalMessages: messages.length, contextSize: contextSize, requestedMessageIds: params.message_ids, - messages: messages.map((m) => formatMessageCompact(m, locale)), + rawMessages: messages, } return { diff --git a/electron/main/ai/tools/definitions/get-recent-messages.ts b/electron/main/ai/tools/definitions/get-recent-messages.ts index 404c18b..f26a072 100644 --- a/electron/main/ai/tools/definitions/get-recent-messages.ts +++ b/electron/main/ai/tools/definitions/get-recent-messages.ts @@ -3,7 +3,7 @@ import type { AgentTool } from '@mariozechner/pi-agent-core' import type { ToolContext } from '../types' import * as workerManager from '../../../worker/workerManager' import { parseExtendedTimeParams } from '../utils/time-params' -import { formatTimeRange, formatMessageCompact } from '../utils/format' +import { formatTimeRange } from '../utils/format' import { timeParamProperties } from '../utils/schemas' const schema = Type.Object({ @@ -29,7 +29,7 @@ export function createTool(context: ToolContext): AgentTool { total: result.total, returned: result.messages.length, timeRange: formatTimeRange(effectiveTimeFilter, locale), - messages: result.messages.map((m) => formatMessageCompact(m, locale)), + rawMessages: result.messages, } return { diff --git a/electron/main/ai/tools/definitions/get-session-messages.ts b/electron/main/ai/tools/definitions/get-session-messages.ts index a0a8b9f..e993094 100644 --- a/electron/main/ai/tools/definitions/get-session-messages.ts +++ b/electron/main/ai/tools/definitions/get-session-messages.ts @@ -2,7 +2,7 @@ import { Type } from '@mariozechner/pi-ai' import type { AgentTool } from '@mariozechner/pi-agent-core' import type { ToolContext } from '../types' import * as workerManager from '../../../worker/workerManager' -import { isChineseLocale, formatMessageCompact } from '../utils/format' +import { isChineseLocale } from '../utils/format' const schema = Type.Object({ session_id: Type.Number({ description: 'ai.tools.get_session_messages.params.session_id' }), @@ -38,7 +38,7 @@ export function createTool(context: ToolContext): AgentTool { messageCount: result.messageCount, returnedCount: result.returnedCount, participants: result.participants, - messages: result.messages.map((m) => formatMessageCompact(m, locale)), + rawMessages: result.messages, } } diff --git a/electron/main/ai/tools/definitions/search-messages.ts b/electron/main/ai/tools/definitions/search-messages.ts index 85fe740..af7846b 100644 --- a/electron/main/ai/tools/definitions/search-messages.ts +++ b/electron/main/ai/tools/definitions/search-messages.ts @@ -3,7 +3,7 @@ import type { AgentTool } from '@mariozechner/pi-agent-core' import type { ToolContext } from '../types' import * as workerManager from '../../../worker/workerManager' import { parseExtendedTimeParams } from '../utils/time-params' -import { formatTimeRange, formatMessageCompact } from '../utils/format' +import { formatTimeRange } from '../utils/format' import { timeParamProperties } from '../utils/schemas' const schema = Type.Object({ @@ -38,7 +38,7 @@ export function createTool(context: ToolContext): AgentTool { total: result.total, returned: result.messages.length, timeRange: formatTimeRange(effectiveTimeFilter, locale), - messages: result.messages.map((m) => formatMessageCompact(m, locale)), + rawMessages: result.messages, } return { diff --git a/electron/main/ai/tools/index.ts b/electron/main/ai/tools/index.ts index 2297aa0..aaaed5d 100644 --- a/electron/main/ai/tools/index.ts +++ b/electron/main/ai/tools/index.ts @@ -1,6 +1,8 @@ /** * AI Tools 模块入口 - * 工具创建与管理 + * 工具创建、预处理管道与管理 + * + * 架构:工具返回结构化数据(rawMessages) → 处理层执行预处理 + 格式化 → 生成 LLM 内容 */ import type { AgentTool } from '@mariozechner/pi-agent-core' @@ -21,6 +23,8 @@ import { } from './definitions' import { isEmbeddingEnabled } from '../rag' import { t as i18nT } from '../../i18n' +import { preprocessMessages, type PreprocessableMessage } from '../preprocessor' +import { formatMessageCompact } from './utils/format' // 导出类型 export * from './types' @@ -41,6 +45,57 @@ const coreFactories: ToolFactory[] = [ createGetSessionSummaries, ] +/** + * 将工具返回的结构化数据格式化为 LLM 友好的纯文本 + * + * 从 JSON.stringify 改为纯文本,节省 token 且更易于 LLM 理解。 + * 元数据作为头部,消息逐行排列。 + */ +function formatToolResultAsText(details: Record): string { + const lines: string[] = [] + const messages = details.messages as string[] | undefined + + for (const [key, value] of Object.entries(details)) { + if (key === 'messages') continue + if (value === undefined || value === null) continue + + if (typeof value === 'object') { + if ('start' in (value as Record) && 'end' in (value as Record)) { + const range = value as { start: string; end: string } + lines.push(`${key}: ${range.start} ~ ${range.end}`) + } else if (Array.isArray(value)) { + lines.push(`${key}: ${value.join(', ')}`) + } else { + lines.push(`${key}: ${JSON.stringify(value)}`) + } + } else { + lines.push(`${key}: ${value}`) + } + } + + if (messages && messages.length > 0) { + lines.push('') + let lastDate = '' + for (const msg of messages) { + const spaceIdx = msg.indexOf(' ') + const secondSpaceIdx = msg.indexOf(' ', spaceIdx + 1) + if (spaceIdx > 0 && secondSpaceIdx > 0) { + const date = msg.slice(0, spaceIdx) + const rest = msg.slice(spaceIdx + 1) + if (date !== lastDate) { + lines.push(`--- ${date} ---`) + lastDate = date + } + lines.push(rest) + } else { + lines.push(msg) + } + } + } + + return lines.join('\n') +} + /** * 翻译 AgentTool 的描述(工具级 + 参数级) * @@ -71,11 +126,87 @@ function translateTool(tool: AgentTool): AgentTool { } } +/** + * 预处理包装层 + * 拦截工具的 execute 结果:如果 details 中包含 rawMessages, + * 则执行预处理管道 + 格式化,替换为最终的 LLM 内容 + * + * 工具约定:返回消息的工具在 details 中放置 rawMessages 字段(结构化消息数组), + * 处理层负责 preprocess + formatMessageCompact,工具无需感知预处理逻辑。 + */ +function wrapWithPreprocessing(tool: AgentTool, context: ToolContext): AgentTool { + const originalExecute = tool.execute + return { + ...tool, + execute: async (toolCallId: string, params: any) => { + const result = await originalExecute(toolCallId, params) + + const details = result.details as Record | undefined + if (!details?.rawMessages || !Array.isArray(details.rawMessages)) { + return result + } + + const raw = details.rawMessages as PreprocessableMessage[] + const processed = preprocessMessages(raw, context.preprocessConfig) + + let nameMapLine = '' + if (context.preprocessConfig?.anonymizeNames) { + nameMapLine = anonymizeMessageNames(processed, context.ownerInfo?.platformId) + } + + const formatted = processed.map((m) => formatMessageCompact(m, context.locale)) + + const finalDetails = { ...details, messages: formatted, returned: processed.length } + delete finalDetails.rawMessages + + let textContent = formatToolResultAsText(finalDetails) + if (nameMapLine) { + textContent = nameMapLine + '\n' + textContent + } + + return { + content: [{ type: 'text' as const, text: textContent }], + details: finalDetails, + } + }, + } +} + +/** + * 昵称匿名化:用 U{senderId} 替代真实昵称 + * 就地修改 messages 的 senderName,返回映射表文本行 + */ +function anonymizeMessageNames(messages: PreprocessableMessage[], ownerPlatformId?: string): string { + const nameMap = new Map() + for (const msg of messages) { + if (msg.senderId != null && !nameMap.has(msg.senderId)) { + nameMap.set(msg.senderId, { name: msg.senderName, platformId: msg.senderPlatformId }) + } + } + + if (nameMap.size === 0) return '' + + for (const msg of messages) { + if (msg.senderId != null) { + msg.senderName = `U${msg.senderId}` + } + } + + const entries: string[] = [] + for (const [id, { name, platformId }] of nameMap) { + const isOwner = ownerPlatformId && platformId === ownerPlatformId + entries.push(`U${id}=${name}${isOwner ? '(owner)' : ''}`) + } + + return `[Name Map] ${entries.join(' | ')}` +} + /** * 获取所有可用的 AgentTool * * 根据配置动态过滤工具(如:语义搜索工具仅在启用 Embedding 时可用) * 根据当前 locale 动态翻译工具描述 + * 统一包装预处理层 */ export function getAllTools(context: ToolContext): AgentTool[] { const tools: AgentTool[] = coreFactories.map((f) => f(context)) @@ -84,5 +215,5 @@ export function getAllTools(context: ToolContext): AgentTool[] { tools.push(createSemanticSearchMessages(context)) } - return tools.map(translateTool) + return tools.map(translateTool).map((t) => wrapWithPreprocessing(t, context)) } diff --git a/electron/main/ai/tools/types.ts b/electron/main/ai/tools/types.ts index 136c3a1..96b4a32 100644 --- a/electron/main/ai/tools/types.ts +++ b/electron/main/ai/tools/types.ts @@ -2,6 +2,8 @@ * AI Tools 类型定义 */ +import type { PreprocessConfig } from '../preprocessor' + /** Owner 信息(当前用户在对话中的身份) */ export interface OwnerInfo { /** Owner 的 platformId */ @@ -30,4 +32,6 @@ export interface ToolContext { ownerInfo?: OwnerInfo /** 语言环境(用于工具返回结果的国际化) */ locale?: string + /** 聊天记录预处理配置(全局) */ + preprocessConfig?: PreprocessConfig } diff --git a/electron/main/worker/query/messages.ts b/electron/main/worker/query/messages.ts index beabb75..96c9614 100644 --- a/electron/main/worker/query/messages.ts +++ b/electron/main/worker/query/messages.ts @@ -14,6 +14,7 @@ import { ensureAvatarColumn } from './basic' */ export interface MessageResult { id: number + senderId: number senderName: string senderPlatformId: string senderAliases: string[] @@ -49,6 +50,7 @@ export interface MessagesWithTotal { */ interface DbMessageRow { id: number + senderId: number senderName: string senderPlatformId: string aliases: string | null @@ -78,6 +80,7 @@ function sanitizeMessageRow(row: DbMessageRow): MessageResult { return { id: Number(row.id), + senderId: Number(row.senderId), senderName: String(row.senderName || ''), senderPlatformId: String(row.senderPlatformId || ''), senderAliases: aliases, @@ -155,6 +158,7 @@ export function getRecentMessages(sessionId: string, filter?: TimeFilter, limit: const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -218,6 +222,7 @@ export function getAllRecentMessages(sessionId: string, filter?: TimeFilter, lim const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -301,6 +306,7 @@ export function searchMessages( const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -391,6 +397,7 @@ export function getMessageContext( const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -450,6 +457,7 @@ export function getMessagesBefore( const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -522,6 +530,7 @@ export function getMessagesAfter( const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, @@ -622,6 +631,7 @@ export function getConversationBetween( const sql = ` SELECT msg.id, + m.id as senderId, COALESCE(m.group_nickname, m.account_name, m.platform_id) as senderName, m.platform_id as senderPlatformId, m.aliases, diff --git a/electron/main/worker/query/session/aiTools.ts b/electron/main/worker/query/session/aiTools.ts index af1a6b1..678b829 100644 --- a/electron/main/worker/query/session/aiTools.ts +++ b/electron/main/worker/query/session/aiTools.ts @@ -78,7 +78,9 @@ export function searchSessions( const previewSql = ` SELECT m.id, + mb.id as senderId, COALESCE(mb.group_nickname, mb.account_name, mb.platform_id) as senderName, + mb.platform_id as senderPlatformId, m.content, m.ts as timestamp FROM message_context mc @@ -93,7 +95,9 @@ export function searchSessions( for (const session of sessions) { const previewMessages = db.prepare(previewSql).all(session.id, previewCount) as Array<{ id: number + senderId: number senderName: string + senderPlatformId: string content: string | null timestamp: number }> @@ -164,7 +168,9 @@ export function getSessionMessages( const messagesSql = ` SELECT m.id, + mb.id as senderId, COALESCE(mb.group_nickname, mb.account_name, mb.platform_id) as senderName, + mb.platform_id as senderPlatformId, m.content, m.ts as timestamp FROM message_context mc @@ -176,7 +182,9 @@ export function getSessionMessages( ` const messages = db.prepare(messagesSql).all(chatSessionId, limit) as Array<{ id: number + senderId: number senderName: string + senderPlatformId: string content: string | null timestamp: number }> diff --git a/electron/preload/apis/ai.ts b/electron/preload/apis/ai.ts index d6c37fd..2a7387a 100644 --- a/electron/preload/apis/ai.ts +++ b/electron/preload/apis/ai.ts @@ -100,10 +100,37 @@ export interface AgentResult { toolRounds: number } +/** 单条脱敏规则 */ +export interface DesensitizeRule { + id: string + label: string + pattern: string + replacement: string + enabled: boolean + builtin: boolean + locales: string[] +} + +/** 聊天记录预处理配置 */ +export interface PreprocessConfig { + dataCleaning: boolean + mergeConsecutive: boolean + mergeWindowSeconds?: number + blacklistKeywords: string[] + denoise: boolean + desensitize: boolean + desensitizeRules: DesensitizeRule[] + anonymizeNames: boolean +} + export interface ToolContext { sessionId: string conversationId?: string timeFilter?: { startTs: number; endTs: number } + maxMessagesLimit?: number + ownerInfo?: { platformId: string; displayName: string } + locale?: string + preprocessConfig?: PreprocessConfig } // AI 服务配置类型(前端用) @@ -463,6 +490,14 @@ export const aiApi = { showAiLogFile: (): Promise<{ success: boolean; path?: string; error?: string }> => { return ipcRenderer.invoke('ai:showLogFile') }, + + getDefaultDesensitizeRules: (locale: string): Promise => { + return ipcRenderer.invoke('ai:getDefaultDesensitizeRules', locale) + }, + + mergeDesensitizeRules: (existingRules: DesensitizeRule[], locale: string): Promise => { + return ipcRenderer.invoke('ai:mergeDesensitizeRules', existingRules, locale) + }, } // ==================== LLM API ==================== @@ -647,12 +682,34 @@ export const agentApi = { locale?: string, maxHistoryRounds?: number ): { requestId: string; promise: Promise<{ success: boolean; result?: AgentResult; error?: string }> } => { + // 防御性处理:确保传给 IPC 的 context 是“可结构化克隆”的纯对象 + // 避免调用方误传入响应式 Proxy(例如 Pinia/Vue state)导致 invoke 失败 + const sanitizedContext: ToolContext = { + sessionId: context.sessionId, + conversationId: context.conversationId, + timeFilter: context.timeFilter + ? { + startTs: context.timeFilter.startTs, + endTs: context.timeFilter.endTs, + } + : undefined, + maxMessagesLimit: context.maxMessagesLimit, + ownerInfo: context.ownerInfo + ? { + platformId: context.ownerInfo.platformId, + displayName: context.ownerInfo.displayName, + } + : undefined, + locale: context.locale, + preprocessConfig: context.preprocessConfig, + } + const requestId = `agent_${Date.now()}_${Math.random().toString(36).slice(2, 8)}` console.log( '[preload] Agent runStream 开始,requestId:', requestId, 'conversationId:', - context.conversationId ?? 'none', + sanitizedContext.conversationId ?? 'none', 'chatType:', chatType ?? 'group', 'hasPromptConfig:', @@ -694,7 +751,16 @@ export const agentApi = { ipcRenderer.on('agent:complete', completeHandler) ipcRenderer - .invoke('agent:runStream', requestId, userMessage, context, chatType, promptConfig, locale, maxHistoryRounds) + .invoke( + 'agent:runStream', + requestId, + userMessage, + sanitizedContext, + chatType, + promptConfig, + locale, + maxHistoryRounds + ) .then((result) => { console.log('[preload] Agent invoke 返回:', result) if (!result.success) { diff --git a/electron/preload/index.d.ts b/electron/preload/index.d.ts index 2f8fb03..8bfa278 100644 --- a/electron/preload/index.d.ts +++ b/electron/preload/index.d.ts @@ -373,6 +373,8 @@ interface AiApi { getMessages: (conversationId: string) => Promise deleteMessage: (messageId: string) => Promise showAiLogFile: () => Promise<{ success: boolean; path?: string; error?: string }> + getDefaultDesensitizeRules: (locale: string) => Promise + mergeDesensitizeRules: (existingRules: DesensitizeRule[], locale: string) => Promise // 自定义筛选(支持分页) filterMessagesWithContext: ( sessionId: string, @@ -617,6 +619,29 @@ interface OwnerInfo { displayName: string } +/** 单条脱敏规则 */ +interface DesensitizeRule { + id: string + label: string + pattern: string + replacement: string + enabled: boolean + builtin: boolean + locales: string[] +} + +/** 聊天记录预处理配置 */ +interface PreprocessConfig { + dataCleaning: boolean + mergeConsecutive: boolean + mergeWindowSeconds?: number + blacklistKeywords: string[] + denoise: boolean + desensitize: boolean + desensitizeRules: DesensitizeRule[] + anonymizeNames: boolean +} + interface ToolContext { sessionId: string conversationId?: string @@ -625,6 +650,10 @@ interface ToolContext { maxMessagesLimit?: number /** Owner 信息(当前用户在对话中的身份) */ ownerInfo?: OwnerInfo + /** 语言环境 */ + locale?: string + /** 聊天记录预处理配置 */ + preprocessConfig?: PreprocessConfig } // 用户自定义提示词配置 @@ -871,6 +900,8 @@ export { AgentRuntimeStatus, AgentResult, ToolContext, + DesensitizeRule, + PreprocessConfig, PromptConfig, TokenUsage, CacheDirectoryInfo,