diff --git a/electron/main/ai/agent/event-handler.ts b/electron/main/ai/agent/event-handler.ts index 88ca8bd5..8576f388 100644 --- a/electron/main/ai/agent/event-handler.ts +++ b/electron/main/ai/agent/event-handler.ts @@ -210,6 +210,7 @@ export class AgentEventHandler { private estimateContextTokens(systemPrompt: string, messages: PiMessage[], pendingUserMessage?: string): number { let tokens = this.estimateTokensFromText(systemPrompt) for (const message of messages) { + if (message.role === 'toolResult') continue tokens += this.estimateTokensFromText(this.extractMessageText(message)) } if (pendingUserMessage) { diff --git a/electron/main/ai/agent/index.ts b/electron/main/ai/agent/index.ts index ec1e9b02..4e2745a0 100644 --- a/electron/main/ai/agent/index.ts +++ b/electron/main/ai/agent/index.ts @@ -23,7 +23,7 @@ import { buildSystemPrompt } from './prompt-builder' import { extractThinkingContent, stripToolCallTags } from './content-parser' import { AgentEventHandler } from './event-handler' -type SimpleHistoryMessage = { role: 'user' | 'assistant'; content: string } +type SimpleHistoryMessage = { role: 'user' | 'assistant' | 'summary'; content: string } // Re-export types for external consumers export type { AgentConfig, AgentStreamChunk, AgentResult, TokenUsage, AgentRuntimeStatus, SkillContext } from './types' @@ -63,7 +63,6 @@ export class Agent { this.locale = locale this.config = { maxToolRounds: config.maxToolRounds ?? 5, - contextHistoryLimit: config.contextHistoryLimit ?? 48, } } @@ -175,8 +174,7 @@ export class Agent { coreAgent.setTools(maxToolRounds > 0 ? piTools : []) - const limit = this.config.contextHistoryLimit ?? 48 - const historyMessages = this.loadHistory(limit) + const historyMessages = this.loadHistory() coreAgent.replaceMessages(this.toPiHistoryMessages(historyMessages)) handler.emitStatus('preparing', coreAgent.state.messages, { @@ -296,13 +294,13 @@ export class Agent { * 从 SQLite 加载对话历史 * 当 context.conversationId 存在时从 DB 读取,否则返回空数组 */ - private loadHistory(limit: number): SimpleHistoryMessage[] { + private loadHistory(): SimpleHistoryMessage[] { const { conversationId } = this.context if (!conversationId) { return [] } try { - return getHistoryForAgent(conversationId, limit > 0 ? limit : undefined) + return getHistoryForAgent(conversationId) } catch (error) { aiLogger.warn('Agent', 'Failed to load history from DB, using empty history', { conversationId, error }) return [] @@ -330,6 +328,7 @@ export class Agent { } } + // summary 作为 assistant 消息传给 LLM(它是压缩后的上下文摘要) return { role: 'assistant', content: [{ type: 'text', text: msg.content || '' }], diff --git a/electron/main/ai/agent/types.ts b/electron/main/ai/agent/types.ts index 0147dd81..effa5c6f 100644 --- a/electron/main/ai/agent/types.ts +++ b/electron/main/ai/agent/types.ts @@ -12,8 +12,6 @@ export type { TokenUsage, AgentRuntimeStatus, SerializedErrorInfo } from '../../ export interface AgentConfig { /** 最大工具调用轮数(防止无限循环) */ maxToolRounds?: number - /** 注入模型的历史消息上限(user+assistant) */ - contextHistoryLimit?: number /** 中止信号,用于取消执行 */ abortSignal?: AbortSignal } diff --git a/electron/main/ai/compression/index.ts b/electron/main/ai/compression/index.ts new file mode 100644 index 00000000..b3036d4f --- /dev/null +++ b/electron/main/ai/compression/index.ts @@ -0,0 +1,312 @@ +/** + * 上下文压缩服务 + * 在 Agent 推理前同步执行,将过长的对话历史压缩为摘要。 + * + * 核心流程: + * 1. 计算当前上下文总 token → 未超阈值则跳过 + * 2. 确定缓冲区:最近 bufferSizePercent% context window 的消息原文 + * 3. 缓冲区之前的消息(含旧 summary)→ LLM 压缩为新摘要 + * 4. 写入 ai_message(role='summary'),替换旧 summary + * 5. Thrashing 检查 + */ + +import { countTokens, countMessagesTokens } from '../tokenizer' +import { + getLatestSummary, + getMessagesAfterSummary, + getAllUserAssistantMessages, + addSummaryMessage, + getMessageCountAfterSummary, +} from '../conversations' +import { buildPiModel, getActiveConfig, findModelDefinition } from '../llm' +import type { AIServiceConfig } from '../llm/types' +import { completeSimple, type TextContent as PiTextContent } from '@mariozechner/pi-ai' +import { aiLogger } from '../logger' + +// ==================== 类型定义 ==================== + +export interface CompressionConfig { + enabled: boolean + /** 触发压缩的 token 阈值百分比(相对于 context window),默认 75 */ + tokenThresholdPercent: number + /** 保留最近消息的缓冲区大小(相对于 context window 的百分比),默认 20 */ + bufferSizePercent: number + /** 独立压缩模型配置(为空则使用当前对话模型) */ + compressionModelConfigId?: string + /** 单次工具返回的最大上下文占比(相对于 context window 的百分比),默认 35 */ + maxToolResultPercent?: number +} + +export interface CompressionResult { + compressed: boolean + reason: + | 'skipped_disabled' + | 'skipped_below_threshold' + | 'skipped_idempotent' + | 'success' + | 'fallback_truncated' + | 'thrashing' + | 'error' + tokensBefore?: number + tokensAfter?: number + error?: string +} + +const DEFAULT_COMPRESSION_PROMPT = `Please compress the following conversation history into a concise summary, preserving key information, decisions, and context. +Requirements: +- Preserve key facts, data, names, and conclusions +- Preserve user preferences and important instructions +- Preserve time points and important events +- Output in the same language as the conversation +- Keep it within {maxTokens} tokens + +Conversation history: +{messages}` + +const DEFAULT_CONTEXT_WINDOW = 128000 + +// ==================== 核心压缩逻辑 ==================== + +/** + * 检查并执行上下文压缩(同步,在 Agent 推理前调用) + */ +export async function checkAndCompress( + conversationId: string, + config: CompressionConfig, + systemPrompt: string, + activeAIConfig: AIServiceConfig +): Promise { + if (!config.enabled) { + return { compressed: false, reason: 'skipped_disabled' } + } + + try { + const contextWindow = resolveContextWindow(config, activeAIConfig) + const thresholdTokens = Math.floor(contextWindow * (config.tokenThresholdPercent / 100) * 0.95) + + // 收集当前上下文消息 + const summary = getLatestSummary(conversationId) + const messages = summary + ? getMessagesAfterSummary(conversationId, summary.timestamp) + : getAllUserAssistantMessages(conversationId) + + // 构建 token 计算的消息列表 + const historyForTokenCount: Array<{ role: string; content: string }> = [] + if (summary) { + historyForTokenCount.push({ role: 'assistant', content: summary.content }) + } + for (const msg of messages) { + historyForTokenCount.push({ role: msg.role, content: msg.content }) + } + + const currentTokens = countMessagesTokens(historyForTokenCount, systemPrompt) + + aiLogger.info('Compression', `Token check: ${currentTokens} / ${thresholdTokens} (${contextWindow} window)`, { + conversationId, + messageCount: messages.length, + hasSummary: !!summary, + }) + + if (currentTokens < thresholdTokens) { + return { compressed: false, reason: 'skipped_below_threshold', tokensBefore: currentTokens } + } + + // 确定缓冲区(保留最近 N% 的消息) + const bufferTokenBudget = Math.floor(contextWindow * (config.bufferSizePercent / 100)) + const { bufferMessages, messagesToCompress } = splitMessagesForCompression(messages, summary, bufferTokenBudget) + + if (messagesToCompress.length === 0) { + return { compressed: false, reason: 'skipped_below_threshold', tokensBefore: currentTokens } + } + + // 构建压缩输入文本 + const compressInput = buildCompressionInput(messagesToCompress, summary) + const targetTokens = Math.floor(contextWindow * 0.1) + + // 三级降级:独立模型 → 当前模型 → 强制截断 + let summaryText: string | null = null + + // 尝试用配置的压缩模型 + if (config.compressionModelConfigId) { + summaryText = await tryCompress(config.compressionModelConfigId, compressInput, targetTokens) + } + + // 降级到当前模型 + if (!summaryText) { + summaryText = await tryCompressWithConfig(activeAIConfig, compressInput, targetTokens) + } + + // 最终降级:强制截断 + if (!summaryText) { + aiLogger.warn('Compression', 'LLM compression failed, falling back to truncation') + summaryText = forceTruncate(compressInput, targetTokens) + } + + // 写入 summary + addSummaryMessage(conversationId, summaryText) + + // Thrashing 检查:压缩后重新计算 token + const afterMessages = getMessagesAfterSummary(conversationId, Date.now() / 1000 - 1) + const afterTokenCount: Array<{ role: string; content: string }> = [ + { role: 'assistant', content: summaryText }, + ...afterMessages.map((m) => ({ role: m.role, content: m.content })), + ] + const tokensAfter = countMessagesTokens(afterTokenCount, systemPrompt) + + if (tokensAfter >= thresholdTokens) { + aiLogger.warn( + 'Compression', + `Thrashing detected: ${tokensAfter} tokens after compression still >= ${thresholdTokens}` + ) + return { compressed: true, reason: 'thrashing', tokensBefore: currentTokens, tokensAfter } + } + + aiLogger.info('Compression', `Compressed: ${currentTokens} → ${tokensAfter} tokens`) + return { compressed: true, reason: 'success', tokensBefore: currentTokens, tokensAfter } + } catch (error) { + aiLogger.error('Compression', 'Compression failed', { error: String(error) }) + return { compressed: false, reason: 'error', error: String(error) } + } +} + +/** + * 手动压缩(用户手动触发,含幂等检查) + */ +export async function manualCompress( + conversationId: string, + config: CompressionConfig, + systemPrompt: string, + activeAIConfig: AIServiceConfig +): Promise { + const messageCount = getMessageCountAfterSummary(conversationId) + if (messageCount < 5) { + return { compressed: false, reason: 'skipped_idempotent' } + } + + // 手动压缩忽略阈值,强制执行 + const overrideConfig = { ...config, enabled: true, tokenThresholdPercent: 0 } + return checkAndCompress(conversationId, overrideConfig, systemPrompt, activeAIConfig) +} + +// ==================== 内部辅助函数 ==================== + +function resolveContextWindow(_config: CompressionConfig, activeAIConfig: AIServiceConfig): number { + const modelDef = findModelDefinition(activeAIConfig.provider, activeAIConfig.model || '') + return modelDef?.contextWindow ?? DEFAULT_CONTEXT_WINDOW +} + +interface SplitResult { + bufferMessages: Array<{ role: string; content: string; timestamp: number }> + messagesToCompress: Array<{ role: string; content: string; timestamp: number }> +} + +function splitMessagesForCompression( + messages: Array<{ role: string; content: string; timestamp: number }>, + summary: { content: string } | null, + bufferTokenBudget: number +): SplitResult { + let bufferTokens = 0 + let splitIndex = messages.length + + // 从最近的消息向前累计,直到达到缓冲区预算 + for (let i = messages.length - 1; i >= 0; i--) { + const msgTokens = countTokens(messages[i].content) + 4 + if (bufferTokens + msgTokens > bufferTokenBudget) { + splitIndex = i + 1 + break + } + bufferTokens += msgTokens + if (i === 0) { + splitIndex = 0 + } + } + + return { + bufferMessages: messages.slice(splitIndex), + messagesToCompress: messages.slice(0, splitIndex), + } +} + +function buildCompressionInput( + messagesToCompress: Array<{ role: string; content: string }>, + existingSummary: { content: string } | null +): string { + const parts: string[] = [] + + if (existingSummary) { + parts.push(`[Previous Summary]\n${existingSummary.content}\n`) + } + + for (const msg of messagesToCompress) { + const roleLabel = msg.role === 'user' ? 'User' : 'Assistant' + parts.push(`${roleLabel}: ${msg.content}`) + } + + return parts.join('\n\n') +} + +async function tryCompress(configId: string, input: string, targetTokens: number): Promise { + try { + const { getAllConfigs } = await import('../llm') + const allConfigs = getAllConfigs() + const config = allConfigs.find((c) => c.id === configId) + if (!config) return null + + return await tryCompressWithConfig(config, input, targetTokens) + } catch (error) { + aiLogger.warn('Compression', `Compression with config ${configId} failed`, { error: String(error) }) + return null + } +} + +async function tryCompressWithConfig( + aiConfig: AIServiceConfig, + input: string, + targetTokens: number +): Promise { + try { + const piModel = buildPiModel(aiConfig) + const prompt = DEFAULT_COMPRESSION_PROMPT.replace('{maxTokens}', String(targetTokens)).replace('{messages}', input) + + const result = await completeSimple( + piModel, + { + systemPrompt: undefined, + messages: [ + { + role: 'user', + content: [{ type: 'text', text: prompt }], + timestamp: Date.now(), + }, + ] as any, + }, + { + apiKey: aiConfig.apiKey, + maxTokens: targetTokens, + } + ) + + const text = result.content + .filter((item): item is PiTextContent => item.type === 'text') + .map((item) => item.text) + .join('') + + return text || null + } catch (error) { + aiLogger.warn('Compression', 'LLM compression attempt failed', { error: String(error) }) + return null + } +} + +function forceTruncate(input: string, targetTokens: number): string { + const lines = input.split('\n') + const result: string[] = [] + let tokens = 0 + for (const line of lines) { + const lineTokens = countTokens(line) + if (tokens + lineTokens > targetTokens) break + result.push(line) + tokens += lineTokens + } + return result.join('\n') || input.slice(0, targetTokens * 3) +} diff --git a/electron/main/ai/conversations.ts b/electron/main/ai/conversations.ts index 49073bd9..86067b91 100644 --- a/electron/main/ai/conversations.ts +++ b/electron/main/ai/conversations.ts @@ -87,6 +87,12 @@ function migrateAiDatabase(db: Database.Database): void { console.log('[AI DB Migration] Adding content_blocks column') } + // 检查并添加 token_usage 列(JSON: {promptTokens, completionTokens, totalTokens}) + if (!messageColumns.includes('token_usage')) { + db.exec('ALTER TABLE ai_message ADD COLUMN token_usage TEXT') + console.log('[AI DB Migration] Adding token_usage column to ai_message') + } + // 获取 ai_conversation 表的列信息 const convTableInfo = db.pragma('table_info(ai_conversation)') as Array<{ name: string }> const convColumns = convTableInfo.map((col) => col.name) @@ -145,16 +151,26 @@ export type ContentBlock = /** * AI 消息类型 */ +export type AIMessageRole = 'user' | 'assistant' | 'summary' + +export interface TokenUsageData { + promptTokens: number + completionTokens: number + totalTokens: number +} + export interface AIMessage { id: string conversationId: string - role: 'user' | 'assistant' + role: AIMessageRole content: string timestamp: number dataKeywords?: string[] dataMessageCount?: number /** AI 消息的内容块数组(按时序排列的文本和工具调用) */ contentBlocks?: ContentBlock[] + /** 本次 Agent 执行的 token 使用量(仅 assistant 消息) */ + tokenUsage?: TokenUsageData } // ==================== 对话管理 ==================== @@ -283,11 +299,12 @@ export function deleteConversation(conversationId: string): boolean { */ export function addMessage( conversationId: string, - role: 'user' | 'assistant', + role: AIMessageRole, content: string, dataKeywords?: string[], dataMessageCount?: number, - contentBlocks?: ContentBlock[] + contentBlocks?: ContentBlock[], + tokenUsage?: TokenUsageData ): AIMessage { const db = getAiDb() const now = Math.floor(Date.now() / 1000) @@ -295,8 +312,8 @@ export function addMessage( db.prepare( ` - INSERT INTO ai_message (id, conversation_id, role, content, timestamp, data_keywords, data_message_count, content_blocks) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) + INSERT INTO ai_message (id, conversation_id, role, content, timestamp, data_keywords, data_message_count, content_blocks, token_usage) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) ` ).run( id, @@ -306,7 +323,8 @@ export function addMessage( now, dataKeywords ? JSON.stringify(dataKeywords) : null, dataMessageCount ?? null, - contentBlocks ? JSON.stringify(contentBlocks) : null + contentBlocks ? JSON.stringify(contentBlocks) : null, + tokenUsage ? JSON.stringify(tokenUsage) : null ) // 更新对话的 updated_at @@ -321,6 +339,7 @@ export function addMessage( dataKeywords, dataMessageCount, contentBlocks, + tokenUsage, } } @@ -341,7 +360,8 @@ export function getMessages(conversationId: string): AIMessage[] { timestamp, data_keywords as dataKeywords, data_message_count as dataMessageCount, - content_blocks as contentBlocks + content_blocks as contentBlocks, + token_usage as tokenUsage FROM ai_message WHERE conversation_id = ? ORDER BY timestamp ASC @@ -356,17 +376,19 @@ export function getMessages(conversationId: string): AIMessage[] { dataKeywords: string | null dataMessageCount: number | null contentBlocks: string | null + tokenUsage: string | null }> return rows.map((row) => ({ id: row.id, conversationId: row.conversationId, - role: row.role as 'user' | 'assistant', + role: row.role as AIMessageRole, content: row.content, timestamp: row.timestamp, dataKeywords: row.dataKeywords ? JSON.parse(row.dataKeywords) : undefined, dataMessageCount: row.dataMessageCount ?? undefined, contentBlocks: row.contentBlocks ? JSON.parse(row.contentBlocks) : undefined, + tokenUsage: row.tokenUsage ? JSON.parse(row.tokenUsage) : undefined, })) } @@ -379,26 +401,206 @@ export function deleteMessage(messageId: string): boolean { return result.changes > 0 } +/** + * 获取对话的累计 token 使用量(聚合所有 assistant 消息的 token_usage) + */ +export function getConversationTokenUsage(conversationId: string): TokenUsageData { + const db = getAiDb() + const row = db + .prepare( + ` + SELECT + COALESCE(SUM(json_extract(token_usage, '$.promptTokens')), 0) as promptTokens, + COALESCE(SUM(json_extract(token_usage, '$.completionTokens')), 0) as completionTokens, + COALESCE(SUM(json_extract(token_usage, '$.totalTokens')), 0) as totalTokens + FROM ai_message + WHERE conversation_id = ? AND token_usage IS NOT NULL + ` + ) + .get(conversationId) as { promptTokens: number; completionTokens: number; totalTokens: number } + + return { + promptTokens: row.promptTokens, + completionTokens: row.completionTokens, + totalTokens: row.totalTokens, + } +} + // ==================== Agent 专用 ==================== /** * 为 Agent 提供对话历史 * * 返回简化的 {role, content} 格式,按时间升序排列。 + * 当存在 summary 消息时,返回最新 summary + summary 之后的 user/assistant 消息, + * 以避免重复加载已被压缩的旧消息。 + * * @param conversationId 对话 ID - * @param maxMessages 最大返回条数(取最近 N 条) + * @param maxMessages 最大返回条数(取最近 N 条,仅对 summary 之后的消息生效) */ export function getHistoryForAgent( conversationId: string, maxMessages?: number -): Array<{ role: 'user' | 'assistant'; content: string }> { +): Array<{ role: 'user' | 'assistant' | 'summary'; content: string }> { const messages = getMessages(conversationId) - const filtered = messages - .filter((m) => (m.role === 'user' || m.role === 'assistant') && m.content?.trim()) - .map((m) => ({ role: m.role, content: m.content })) + const validMessages = messages.filter( + (m) => (m.role === 'user' || m.role === 'assistant' || m.role === 'summary') && m.content?.trim() + ) - if (maxMessages && filtered.length > maxMessages) { - return filtered.slice(-maxMessages) + // 查找最新的 summary 消息位置 + let summaryIndex = -1 + for (let i = validMessages.length - 1; i >= 0; i--) { + if (validMessages[i].role === 'summary') { + summaryIndex = i + break + } } - return filtered + + let result: Array<{ role: 'user' | 'assistant' | 'summary'; content: string }> + + if (summaryIndex >= 0) { + // 返回 summary + summary 之后的消息 + result = validMessages.slice(summaryIndex).map((m) => ({ role: m.role, content: m.content })) + } else { + result = validMessages.map((m) => ({ role: m.role, content: m.content })) + } + + if (maxMessages && result.length > maxMessages) { + // 如果有 summary 且它是第一条,保留它再截取后面的 + if (result.length > 0 && result[0].role === 'summary') { + const rest = result.slice(1) + const truncated = rest.slice(-(maxMessages - 1)) + return [result[0], ...truncated] + } + return result.slice(-maxMessages) + } + return result +} + +// ==================== Summary / 压缩专用 ==================== + +/** + * 添加 summary 消息并替换旧的 summary(每个对话只保留一条最新 summary) + */ +export function addSummaryMessage(conversationId: string, content: string): AIMessage { + const db = getAiDb() + + // 删除该对话中所有旧的 summary 消息 + db.prepare("DELETE FROM ai_message WHERE conversation_id = ? AND role = 'summary'").run(conversationId) + + return addMessage(conversationId, 'summary', content) +} + +/** + * 获取对话中最新的 summary 消息 + */ +export function getLatestSummary(conversationId: string): AIMessage | null { + const db = getAiDb() + const row = db + .prepare( + ` + SELECT id, conversation_id as conversationId, role, content, timestamp, + data_keywords as dataKeywords, data_message_count as dataMessageCount, content_blocks as contentBlocks + FROM ai_message + WHERE conversation_id = ? AND role = 'summary' + ORDER BY timestamp DESC + LIMIT 1 + ` + ) + .get(conversationId) as + | { + id: string + conversationId: string + role: string + content: string + timestamp: number + dataKeywords: string | null + dataMessageCount: number | null + contentBlocks: string | null + } + | undefined + + if (!row) return null + return { + id: row.id, + conversationId: row.conversationId, + role: row.role as AIMessageRole, + content: row.content, + timestamp: row.timestamp, + dataKeywords: row.dataKeywords ? JSON.parse(row.dataKeywords) : undefined, + dataMessageCount: row.dataMessageCount ?? undefined, + contentBlocks: row.contentBlocks ? JSON.parse(row.contentBlocks) : undefined, + } +} + +/** + * 获取 summary 之后的所有 user/assistant 消息(用于压缩计算) + */ +export function getMessagesAfterSummary( + conversationId: string, + summaryTimestamp: number +): Array<{ role: AIMessageRole; content: string; timestamp: number }> { + const db = getAiDb() + const rows = db + .prepare( + ` + SELECT role, content, timestamp + FROM ai_message + WHERE conversation_id = ? AND timestamp > ? AND role IN ('user', 'assistant') + ORDER BY timestamp ASC + ` + ) + .all(conversationId, summaryTimestamp) as Array<{ + role: string + content: string + timestamp: number + }> + + return rows.map((r) => ({ role: r.role as AIMessageRole, content: r.content, timestamp: r.timestamp })) +} + +/** + * 获取对话中所有 user/assistant 消息(不含 summary,用于首次压缩) + */ +export function getAllUserAssistantMessages( + conversationId: string +): Array<{ role: AIMessageRole; content: string; timestamp: number }> { + const db = getAiDb() + const rows = db + .prepare( + ` + SELECT role, content, timestamp + FROM ai_message + WHERE conversation_id = ? AND role IN ('user', 'assistant') + ORDER BY timestamp ASC + ` + ) + .all(conversationId) as Array<{ + role: string + content: string + timestamp: number + }> + + return rows.map((r) => ({ role: r.role as AIMessageRole, content: r.content, timestamp: r.timestamp })) +} + +/** + * 获取对话中 summary 之后的 user/assistant 消息数量 + */ +export function getMessageCountAfterSummary(conversationId: string): number { + const summary = getLatestSummary(conversationId) + if (!summary) { + const db = getAiDb() + const row = db + .prepare("SELECT COUNT(*) as count FROM ai_message WHERE conversation_id = ? AND role IN ('user', 'assistant')") + .get(conversationId) as { count: number } + return row.count + } + const db = getAiDb() + const row = db + .prepare( + "SELECT COUNT(*) as count FROM ai_message WHERE conversation_id = ? AND timestamp > ? AND role IN ('user', 'assistant')" + ) + .get(conversationId, summary.timestamp) as { count: number } + return row.count } diff --git a/electron/main/ai/tokenizer/index.ts b/electron/main/ai/tokenizer/index.ts new file mode 100644 index 00000000..2f436647 --- /dev/null +++ b/electron/main/ai/tokenizer/index.ts @@ -0,0 +1,47 @@ +/** + * Token 计数模块 + * 使用 js-tiktoken 的 cl100k_base 编码进行近似 token 计数。 + * 该编码是 GPT-4/Claude 系列的近似值,对国内模型有一定误差, + * 因此阈值计算时预留了余量。 + */ + +import { encodingForModel } from 'js-tiktoken' + +let encoder: ReturnType | null = null + +function getEncoder() { + if (!encoder) { + encoder = encodingForModel('gpt-4o') + } + return encoder +} + +/** + * 计算单段文本的 token 数 + */ +export function countTokens(text: string): number { + if (!text) return 0 + return getEncoder().encode(text).length +} + +/** + * 计算消息列表的总 token 数(含 systemPrompt) + * 每条消息额外计 4 tokens 的格式开销(role + 分隔符) + */ +export function countMessagesTokens(messages: Array<{ role: string; content: string }>, systemPrompt?: string): number { + const enc = getEncoder() + let total = 0 + + if (systemPrompt) { + total += enc.encode(systemPrompt).length + 4 + } + + for (const msg of messages) { + total += enc.encode(msg.content).length + 4 + } + + // 回复引导 token + total += 3 + + return total +} diff --git a/electron/main/ai/tools/definitions/index.ts b/electron/main/ai/tools/definitions/index.ts index 858e3dcd..c7fe256c 100644 --- a/electron/main/ai/tools/definitions/index.ts +++ b/electron/main/ai/tools/definitions/index.ts @@ -30,19 +30,34 @@ export { sqlToolEntries } from './sql-analysis' export const TOOL_REGISTRY: ToolRegistryEntry[] = [ // ==================== Core 工具(始终加载) ==================== { name: 'get_chat_overview', factory: createGetChatOverview, category: 'core' }, - { name: 'search_messages', factory: createSearchMessages, category: 'core' }, - { name: 'deep_search_messages', factory: createDeepSearchMessages, category: 'core' }, - { name: 'get_recent_messages', factory: createGetRecentMessages, category: 'core' }, - { name: 'get_message_context', factory: createGetMessageContext, category: 'core' }, + { name: 'search_messages', factory: createSearchMessages, category: 'core', truncationStrategy: 'keep_first' }, + { + name: 'deep_search_messages', + factory: createDeepSearchMessages, + category: 'core', + truncationStrategy: 'keep_first', + }, + { name: 'get_recent_messages', factory: createGetRecentMessages, category: 'core', truncationStrategy: 'keep_last' }, + { name: 'get_message_context', factory: createGetMessageContext, category: 'core', truncationStrategy: 'keep_last' }, { name: 'search_sessions', factory: createSearchSessions, category: 'core' }, - { name: 'get_session_messages', factory: createGetSessionMessages, category: 'core' }, + { + name: 'get_session_messages', + factory: createGetSessionMessages, + category: 'core', + truncationStrategy: 'keep_last', + }, { name: 'get_members', factory: createGetMembers, category: 'core' }, // ==================== Analysis 工具(按需加载) ==================== { name: 'get_member_stats', factory: createGetMemberStats, category: 'analysis' }, { name: 'get_time_stats', factory: createGetTimeStats, category: 'analysis' }, { name: 'get_member_name_history', factory: createGetMemberNameHistory, category: 'analysis' }, - { name: 'get_conversation_between', factory: createGetConversationBetween, category: 'analysis' }, + { + name: 'get_conversation_between', + factory: createGetConversationBetween, + category: 'analysis', + truncationStrategy: 'keep_last', + }, { name: 'get_session_summaries', factory: createGetSessionSummaries, category: 'analysis' }, { name: 'response_time_analysis', factory: createResponseTimeAnalysis, category: 'analysis' }, { name: 'keyword_frequency', factory: createKeywordFrequency, category: 'analysis' }, diff --git a/electron/main/ai/tools/index.ts b/electron/main/ai/tools/index.ts index 9a419dcd..d488ae92 100644 --- a/electron/main/ai/tools/index.ts +++ b/electron/main/ai/tools/index.ts @@ -6,16 +6,21 @@ */ import type { AgentTool } from '@mariozechner/pi-agent-core' -import type { ToolContext } from './types' +import type { ToolContext, TruncationStrategy } from './types' import { TOOL_REGISTRY } from './definitions' const CORE_TOOL_NAMES = new Set(TOOL_REGISTRY.filter((e) => e.category === 'core').map((e) => e.name)) import { t as i18nT } from '../../i18n' import { preprocessMessages, type PreprocessableMessage } from '../preprocessor' import { formatMessageCompact } from './utils/format' +import { countTokens } from '../tokenizer' import { getSkillConfig } from '../skills' import type { SkillDef } from '../skills/types' +const TRUNCATION_STRATEGY_MAP = new Map( + TOOL_REGISTRY.filter((e) => e.truncationStrategy).map((e) => [e.name, e.truncationStrategy!]) +) + // 导出类型 export * from './types' @@ -128,12 +133,35 @@ function wrapWithPreprocessing(tool: AgentTool, context: ToolContext): Agen nameMapLine = anonymizeMessageNames(processed, context.ownerInfo?.platformId) } - const formatted = processed.map((m) => formatMessageCompact(m, context.locale)) + let formatted = processed.map((m) => formatMessageCompact(m, context.locale)) + + // Token-aware 截断:超出预算时按策略裁剪消息列表 + let wasTruncated = false + const originalCount = formatted.length + if (context.maxToolResultTokens && context.maxToolResultTokens > 0) { + const truncResult = truncateFormattedMessages( + formatted, + context.maxToolResultTokens, + TRUNCATION_STRATEGY_MAP.get(tool.name) ?? 'keep_last' + ) + if (truncResult.wasTruncated) { + formatted = truncResult.messages + wasTruncated = true + } + } const { rawMessages: _rawMessages, ...restDetails } = details - const finalDetails = { ...restDetails, messages: formatted, returned: processed.length } + const finalDetails = { ...restDetails, messages: formatted, returned: formatted.length } let textContent = formatToolResultAsText(finalDetails) + + if (wasTruncated) { + const strategy = TRUNCATION_STRATEGY_MAP.get(tool.name) ?? 'keep_last' + const strategyDesc = strategy === 'keep_first' ? 'most relevant' : 'most recent' + const notice = `⚠️ Results truncated: ${originalCount} messages found, showing ${formatted.length} ${strategyDesc} due to context limit. Use a narrower time range or more specific keywords for more precise results.` + textContent = notice + '\n' + textContent + } + if (nameMapLine) { textContent = nameMapLine + '\n' + textContent } @@ -146,6 +174,51 @@ function wrapWithPreprocessing(tool: AgentTool, context: ToolContext): Agen } } +/** + * Token-aware 截断:在 token 预算内保留尽可能多的消息 + */ +function truncateFormattedMessages( + formatted: string[], + maxTokens: number, + strategy: TruncationStrategy +): { messages: string[]; wasTruncated: boolean } { + // 预留 token 给元数据头部和截断提示 + const budget = maxTokens - 200 + + // 先快速估算总 token,如果未超预算则直接返回 + let totalTokens = 0 + for (const line of formatted) { + totalTokens += countTokens(line) + 1 + } + if (totalTokens <= budget) { + return { messages: formatted, wasTruncated: false } + } + + if (strategy === 'keep_first') { + let tokens = 0 + let cutIndex = formatted.length + for (let i = 0; i < formatted.length; i++) { + tokens += countTokens(formatted[i]) + 1 + if (tokens > budget) { + cutIndex = i + break + } + } + return { messages: formatted.slice(0, cutIndex), wasTruncated: cutIndex < formatted.length } + } else { + let tokens = 0 + let cutIndex = 0 + for (let i = formatted.length - 1; i >= 0; i--) { + tokens += countTokens(formatted[i]) + 1 + if (tokens > budget) { + cutIndex = i + 1 + break + } + } + return { messages: formatted.slice(cutIndex), wasTruncated: cutIndex > 0 } + } +} + /** * 昵称匿名化:用 U{senderId} 替代真实昵称 * 就地修改 messages 的 senderName,返回映射表文本行 diff --git a/electron/main/ai/tools/types.ts b/electron/main/ai/tools/types.ts index 232fdec5..f11c7114 100644 --- a/electron/main/ai/tools/types.ts +++ b/electron/main/ai/tools/types.ts @@ -9,10 +9,14 @@ export type ToolCategory = 'core' | 'analysis' export type ToolFactory = (context: ToolContext) => AgentTool +export type TruncationStrategy = 'keep_first' | 'keep_last' + export interface ToolRegistryEntry { name: string factory: ToolFactory category: ToolCategory + /** 截断策略:keep_first=保留前N条(搜索类), keep_last=保留后N条(时序类) */ + truncationStrategy?: TruncationStrategy } /** Owner 信息(当前用户在对话中的身份) */ @@ -57,4 +61,6 @@ export interface ToolContext { searchContextBefore?: number /** 搜索结果上下文:向后取多少条(默认 3) */ searchContextAfter?: number + /** 单次工具返回的最大 token 数(基于 context window 动态计算) */ + maxToolResultTokens?: number } diff --git a/electron/main/ipc/ai.ts b/electron/main/ipc/ai.ts index 647b6be9..0b5ed274 100644 --- a/electron/main/ipc/ai.ts +++ b/electron/main/ipc/ai.ts @@ -11,6 +11,8 @@ import { getLogsDir } from '../paths' import { Agent, type AgentStreamChunk, type SkillContext } from '../ai/agent' import { getDefaultGeneralAssistantId } from '../ai/assistant/defaultGeneral' import { getActiveConfig, buildPiModel } from '../ai/llm' +import { checkAndCompress, manualCompress, type CompressionConfig } from '../ai/compression' +import { countMessagesTokens } from '../ai/tokenizer' import * as assistantManager from '../ai/assistant' import type { AssistantConfig } from '../ai/assistant/types' import * as skillManager from '../ai/skills' @@ -312,14 +314,23 @@ export function registerAIHandlers({ win }: IpcContext): void { async ( _, conversationId: string, - role: 'user' | 'assistant', + role: aiConversations.AIMessageRole, content: string, dataKeywords?: string[], dataMessageCount?: number, - contentBlocks?: aiConversations.ContentBlock[] + contentBlocks?: aiConversations.ContentBlock[], + tokenUsage?: aiConversations.TokenUsageData ) => { try { - return aiConversations.addMessage(conversationId, role, content, dataKeywords, dataMessageCount, contentBlocks) + return aiConversations.addMessage( + conversationId, + role, + content, + dataKeywords, + dataMessageCount, + contentBlocks, + tokenUsage + ) } catch (error) { console.error('Failed to add AI message:', error) throw error @@ -339,6 +350,18 @@ export function registerAIHandlers({ win }: IpcContext): void { } }) + /** + * 获取对话的累计 token 使用量 + */ + ipcMain.handle('ai:getConversationTokenUsage', async (_, conversationId: string) => { + try { + return aiConversations.getConversationTokenUsage(conversationId) + } catch (error) { + console.error('Failed to get conversation token usage:', error) + return { promptTokens: 0, completionTokens: 0, totalTokens: 0 } + } + }) + /** * 删除 AI 消息 */ @@ -1026,7 +1049,6 @@ export function registerAIHandlers({ win }: IpcContext): void { * Agent 通过 context.conversationId 从 SQLite 读取对话历史(数据流倒置) * @param chatType 聊天类型('group' | 'private') * @param locale 语言设置(可选,默认 'zh-CN') - * @param maxHistoryRounds 前端用户配置的最大历史轮数(可选,每轮 = user + assistant = 2 条) * @param assistantId 助手 ID(可选,传入时从 AssistantManager 获取配置) */ ipcMain.handle( @@ -1038,10 +1060,10 @@ export function registerAIHandlers({ win }: IpcContext): void { context: ToolContext, chatType?: 'group' | 'private', locale?: string, - maxHistoryRounds?: number, assistantId?: string, skillId?: string | null, - enableAutoSkill?: boolean + enableAutoSkill?: boolean, + compressionConfig?: CompressionConfig ) => { aiLogger.info('IPC', `Agent stream request received: ${requestId}`, { userMessage: userMessage.slice(0, 100), @@ -1063,14 +1085,61 @@ export function registerAIHandlers({ win }: IpcContext): void { } const piModel = buildPiModel(activeAIConfig) - const contextHistoryLimit = maxHistoryRounds ? maxHistoryRounds * 2 : undefined + // 上下文压缩前置步骤(在 Agent 创建之前执行) + if (compressionConfig?.enabled && context.conversationId) { + try { + win.webContents.send('agent:streamChunk', { + requestId, + chunk: { + type: 'status', + status: { + phase: 'preparing', + round: 0, + toolsUsed: 0, + contextTokens: 0, + totalUsage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 }, + updatedAt: Date.now(), + } satisfies import('@electron/shared/types').AgentRuntimeStatus, + }, + }) + + // 获取助手 systemPrompt 用于 token 计算 + const tempAssistantConfig = assistantId + ? (assistantManager.getAssistantConfig(assistantId) ?? undefined) + : undefined + const systemPromptForCompression = tempAssistantConfig?.systemPrompt || '' + + const compressionResult = await checkAndCompress( + context.conversationId, + compressionConfig, + systemPromptForCompression, + activeAIConfig + ) + + aiLogger.info('IPC', `Compression result for ${requestId}`, compressionResult) + + if (compressionResult.compressed) { + win.webContents.send('agent:streamChunk', { + requestId, + chunk: { + type: 'status', + status: 'compressed', + content: `Context compressed: ${compressionResult.tokensBefore} → ${compressionResult.tokensAfter} tokens`, + }, + }) + } + } catch (error) { + aiLogger.error('IPC', `Compression failed for ${requestId}, continuing without compression`, { + error: String(error), + }) + } + } const pp = context.preprocessConfig aiLogger.info('IPC', `Agent context: ${requestId}`, { model: activeAIConfig.model, provider: activeAIConfig.provider, baseUrl: activeAIConfig.baseUrl || '(default)', - maxHistoryRounds: maxHistoryRounds ?? '(default)', maxMessagesLimit: context.maxMessagesLimit, hasTimeFilter: !!context.timeFilter, mentionedMembersCount: context.mentionedMembers?.length ?? 0, @@ -1117,11 +1186,18 @@ export function registerAIHandlers({ win }: IpcContext): void { } } + // 工具结果 token 预算注入:基于 context window 百分比计算 + const maxToolResultPercent = compressionConfig?.maxToolResultPercent ?? 50 + const modelDef = llm.findModelDefinition(activeAIConfig.provider, activeAIConfig.model || '') + const resolvedContextWindow = modelDef?.contextWindow || 128000 + const maxToolResultTokens = Math.floor(resolvedContextWindow * (maxToolResultPercent / 100)) + const enrichedContext: ToolContext = { ...context, maxToolResultTokens } + const agent = new Agent( - context, + enrichedContext, piModel, activeAIConfig.apiKey, - { abortSignal: abortController.signal, contextHistoryLimit }, + { abortSignal: abortController.signal }, chatType ?? 'group', locale ?? 'zh-CN', assistantConfig, @@ -1237,6 +1313,36 @@ export function registerAIHandlers({ win }: IpcContext): void { } }) + // ==================== 上下文压缩 ==================== + + ipcMain.handle( + 'ai:compressContext', + async (_, conversationId: string, compressionConfig: CompressionConfig, systemPrompt: string) => { + try { + const activeAIConfig = getActiveConfig() + if (!activeAIConfig) { + return { success: false, error: t('llm.notConfigured') } + } + + const result = await manualCompress(conversationId, compressionConfig, systemPrompt, activeAIConfig) + return { success: true, result } + } catch (error) { + aiLogger.error('IPC', 'Manual compression failed', { error: String(error) }) + return { success: false, error: String(error) } + } + } + ) + + ipcMain.handle('ai:estimateContextTokens', async (_, conversationId: string) => { + try { + const history = aiConversations.getHistoryForAgent(conversationId) + const tokens = countMessagesTokens(history.map((m) => ({ role: m.role, content: m.content }))) + return { success: true, tokens, messageCount: history.length } + } catch (error) { + return { success: false, tokens: 0, error: String(error) } + } + }) + // ==================== Embedding 多配置管理 ==================== /** diff --git a/electron/preload/apis/ai.ts b/electron/preload/apis/ai.ts index 546b7e53..15ba212a 100644 --- a/electron/preload/apis/ai.ts +++ b/electron/preload/apis/ai.ts @@ -41,15 +41,24 @@ export type ContentBlock = } | { type: 'skill'; skillId: string; skillName: string } +export type AIMessageRole = 'user' | 'assistant' | 'summary' + +export interface TokenUsageData { + promptTokens: number + completionTokens: number + totalTokens: number +} + export interface AIMessage { id: string conversationId: string - role: 'user' | 'assistant' + role: AIMessageRole content: string timestamp: number dataKeywords?: string[] dataMessageCount?: number contentBlocks?: ContentBlock[] + tokenUsage?: TokenUsageData } // LLM API 类型 @@ -479,11 +488,12 @@ export const aiApi = { */ addMessage: ( conversationId: string, - role: 'user' | 'assistant', + role: AIMessageRole, content: string, dataKeywords?: string[], dataMessageCount?: number, - contentBlocks?: ContentBlock[] + contentBlocks?: ContentBlock[], + tokenUsage?: TokenUsageData ): Promise => { return ipcRenderer.invoke( 'ai:addMessage', @@ -492,7 +502,8 @@ export const aiApi = { content, dataKeywords, dataMessageCount, - contentBlocks + contentBlocks, + tokenUsage ) }, @@ -503,6 +514,13 @@ export const aiApi = { return ipcRenderer.invoke('ai:getMessages', conversationId) }, + /** + * 获取对话的累计 token 使用量 + */ + getConversationTokenUsage: (conversationId: string): Promise => { + return ipcRenderer.invoke('ai:getConversationTokenUsage', conversationId) + }, + /** * 删除 AI 消息 */ @@ -541,6 +559,36 @@ export const aiApi = { cancelToolTest: (testId: string): Promise<{ success: boolean }> => { return ipcRenderer.invoke('ai:cancelToolTest', testId) }, + + estimateContextTokens: ( + conversationId: string + ): Promise<{ success: boolean; tokens: number; messageCount?: number; error?: string }> => { + return ipcRenderer.invoke('ai:estimateContextTokens', conversationId) + }, + + compressContext: ( + conversationId: string, + compressionConfig: { + enabled: boolean + tokenThresholdPercent: number + bufferSizePercent: number + compressionModelConfigId?: string + maxToolResultPercent?: number + }, + systemPrompt: string + ): Promise<{ + success: boolean + result?: { + compressed: boolean + reason: string + tokensBefore?: number + tokensAfter?: number + error?: string + } + error?: string + }> => { + return ipcRenderer.invoke('ai:compressContext', conversationId, compressionConfig, systemPrompt) + }, } // ==================== LLM API ==================== @@ -913,7 +961,6 @@ export const agentApi = { * Agent 通过 context.conversationId 从后端 SQLite 读取对话历史 * @param chatType 聊天类型('group' | 'private') * @param locale 语言设置(可选,默认 'zh-CN') - * @param maxHistoryRounds 最大历史轮数(可选,每轮 = user + assistant = 2 条) * @returns 返回 { requestId, promise },requestId 可用于中止请求 */ runStream: ( @@ -922,10 +969,16 @@ export const agentApi = { onChunk?: (chunk: AgentStreamChunk) => void, chatType?: 'group' | 'private', locale?: string, - maxHistoryRounds?: number, assistantId?: string, skillId?: string | null, - enableAutoSkill?: boolean + enableAutoSkill?: boolean, + compressionConfig?: { + enabled: boolean + tokenThresholdPercent: number + bufferSizePercent: number + compressionModelConfigId?: string + maxToolResultPercent?: number + } ): { requestId: string promise: Promise<{ success: boolean; result?: AgentResult; error?: SerializedErrorInfo }> @@ -1013,10 +1066,10 @@ export const agentApi = { sanitizedContext, chatType, locale, - maxHistoryRounds, assistantId, skillId, - enableAutoSkill + enableAutoSkill, + compressionConfig ) .then((result) => { console.log('[preload] Agent invoke 返回:', result) diff --git a/electron/preload/index.d.ts b/electron/preload/index.d.ts index 6a6d4088..e6a37fa1 100644 --- a/electron/preload/index.d.ts +++ b/electron/preload/index.d.ts @@ -305,15 +305,24 @@ type AIContentBlock = } | { type: 'skill'; skillId: string; skillName: string } +type AIMessageRole = 'user' | 'assistant' | 'summary' + +interface AITokenUsageData { + promptTokens: number + completionTokens: number + totalTokens: number +} + interface AIMessage { id: string conversationId: string - role: 'user' | 'assistant' + role: AIMessageRole content: string timestamp: number dataKeywords?: string[] dataMessageCount?: number contentBlocks?: AIContentBlock[] + tokenUsage?: AITokenUsageData } interface AiApi { @@ -374,10 +383,12 @@ interface AiApi { content: string, dataKeywords?: string[], dataMessageCount?: number, - contentBlocks?: AIContentBlock[] + contentBlocks?: AIContentBlock[], + tokenUsage?: AITokenUsageData ) => Promise getMessages: (conversationId: string) => Promise getMessages: (conversationId: string) => Promise + getConversationTokenUsage: (conversationId: string) => Promise deleteMessage: (messageId: string) => Promise showAiLogFile: () => Promise<{ success: boolean; path?: string; error?: string }> getDefaultDesensitizeRules: (locale: string) => Promise @@ -390,6 +401,30 @@ interface AiApi { sessionId: string ) => Promise cancelToolTest: (testId: string) => Promise<{ success: boolean }> + estimateContextTokens: ( + conversationId: string + ) => Promise<{ success: boolean; tokens: number; messageCount?: number; error?: string }> + compressContext: ( + conversationId: string, + compressionConfig: { + enabled: boolean + tokenThresholdPercent: number + bufferSizePercent: number + compressionModelConfigId?: string + maxToolResultPercent?: number + }, + systemPrompt: string + ) => Promise<{ + success: boolean + result?: { + compressed: boolean + reason: string + tokensBefore?: number + tokensAfter?: number + error?: string + } + error?: string + }> // 自定义筛选(支持分页) filterMessagesWithContext: ( sessionId: string, @@ -775,10 +810,16 @@ interface AgentApi { onChunk?: (chunk: AgentStreamChunk) => void, chatType?: 'group' | 'private', locale?: string, - maxHistoryRounds?: number, assistantId?: string, skillId?: string | null, - enableAutoSkill?: boolean + enableAutoSkill?: boolean, + compressionConfig?: { + enabled: boolean + tokenThresholdPercent: number + bufferSizePercent: number + compressionModelConfigId?: string + maxToolResultPercent?: number + } ) => { requestId: string; promise: Promise<{ success: boolean; result?: AgentResult; error?: SerializedErrorInfo }> } abort: (requestId: string) => Promise<{ success: boolean; error?: string }> } diff --git a/package.json b/package.json index f8616a41..e10b705c 100644 --- a/package.json +++ b/package.json @@ -53,6 +53,7 @@ "fastify": "^5.8.4", "gray-matter": "^4.0.3", "i18next": "^25.8.5", + "js-tiktoken": "^1.0.21", "markdown-it": "^14.1.0", "stream-json": "^1.9.1", "vue-i18n": "^11.2.8" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9680535a..d2627ff1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -53,6 +53,9 @@ importers: i18next: specifier: ^25.8.5 version: 25.8.5(typescript@5.9.3) + js-tiktoken: + specifier: ^1.0.21 + version: 1.0.21 markdown-it: specifier: ^14.1.0 version: 14.1.0 @@ -3235,6 +3238,9 @@ packages: resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==} hasBin: true + js-tiktoken@1.0.21: + resolution: {integrity: sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==} + js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} @@ -8913,6 +8919,10 @@ snapshots: jiti@2.6.1: {} + js-tiktoken@1.0.21: + dependencies: + base64-js: 1.5.1 + js-tokens@4.0.0: {} js-tokens@9.0.1: {} diff --git a/src/components/AIChat/ChatExplorer.vue b/src/components/AIChat/ChatExplorer.vue index d6bf8d51..1c47cc99 100644 --- a/src/components/AIChat/ChatExplorer.vue +++ b/src/components/AIChat/ChatExplorer.vue @@ -91,6 +91,28 @@ const { // Store const promptStore = usePromptStore() +// 使用后端 tokenizer 精确计算的 context tokens +const estimatedContextTokens = ref(0) + +watch( + () => currentConversationId.value, + async (convId) => { + if (!convId) { + estimatedContextTokens.value = 0 + return + } + try { + const result = await window.aiApi.estimateContextTokens(convId) + if (result.success) { + estimatedContextTokens.value = result.tokens + } + } catch { + estimatedContextTokens.value = 0 + } + }, + { immediate: true } +) + // 当前选中助手的预设问题 const currentPresetQuestions = computed(() => { return assistantStore.selectedAssistant?.presetQuestions ?? [] @@ -461,6 +483,7 @@ watch( :session-token-usage="sessionTokenUsage" :agent-status="agentStatus" :current-conversation-id="currentConversationId" + :estimated-context-tokens="estimatedContextTokens" /> diff --git a/src/components/AIChat/chat/ChatMessage.vue b/src/components/AIChat/chat/ChatMessage.vue index eac60692..e505e2a2 100644 --- a/src/components/AIChat/chat/ChatMessage.vue +++ b/src/components/AIChat/chat/ChatMessage.vue @@ -13,7 +13,7 @@ const toast = useToast() // Props const props = defineProps<{ - role: 'user' | 'assistant' + role: 'user' | 'assistant' | 'summary' content: string timestamp: number isStreaming?: boolean @@ -30,6 +30,7 @@ const formattedTime = computed(() => { // 是否是用户消息 const isUser = computed(() => props.role === 'user') +const isSummary = computed(() => props.role === 'summary') // 创建 markdown-it 实例 const md = new MarkdownIt({ @@ -301,11 +302,37 @@ async function handleCopyMarkdown() {