diff --git a/electron/main/ai/agent/event-handler.ts b/electron/main/ai/agent/event-handler.ts
index 88ca8bd5..8576f388 100644
--- a/electron/main/ai/agent/event-handler.ts
+++ b/electron/main/ai/agent/event-handler.ts
@@ -210,6 +210,7 @@ export class AgentEventHandler {
   private estimateContextTokens(systemPrompt: string, messages: PiMessage[], pendingUserMessage?: string): number {
     let tokens = this.estimateTokensFromText(systemPrompt)
     for (const message of messages) {
+      if (message.role === 'toolResult') continue
       tokens += this.estimateTokensFromText(this.extractMessageText(message))
     }
     if (pendingUserMessage) {
diff --git a/electron/main/ai/agent/index.ts b/electron/main/ai/agent/index.ts
index ec1e9b02..4e2745a0 100644
--- a/electron/main/ai/agent/index.ts
+++ b/electron/main/ai/agent/index.ts
@@ -23,7 +23,7 @@ import { buildSystemPrompt } from './prompt-builder'
 import { extractThinkingContent, stripToolCallTags } from './content-parser'
 import { AgentEventHandler } from './event-handler'
 
-type SimpleHistoryMessage = { role: 'user' | 'assistant'; content: string }
+type SimpleHistoryMessage = { role: 'user' | 'assistant' | 'summary'; content: string }
 
 // Re-export types for external consumers
 export type { AgentConfig, AgentStreamChunk, AgentResult, TokenUsage, AgentRuntimeStatus, SkillContext } from './types'
@@ -63,7 +63,6 @@ export class Agent {
     this.locale = locale
     this.config = {
       maxToolRounds: config.maxToolRounds ?? 5,
-      contextHistoryLimit: config.contextHistoryLimit ?? 48,
     }
   }
 
@@ -175,8 +174,7 @@ export class Agent {
 
     coreAgent.setTools(maxToolRounds > 0 ? piTools : [])
 
-    const limit = this.config.contextHistoryLimit ?? 48
-    const historyMessages = this.loadHistory(limit)
+    const historyMessages = this.loadHistory()
     coreAgent.replaceMessages(this.toPiHistoryMessages(historyMessages))
 
     handler.emitStatus('preparing', coreAgent.state.messages, {
@@ -296,13 +294,13 @@ export class Agent {
    * 从 SQLite 加载对话历史
    * 当 context.conversationId 存在时从 DB 读取，否则返回空数组
    */
-  private loadHistory(limit: number): SimpleHistoryMessage[] {
+  private loadHistory(): SimpleHistoryMessage[] {
     const { conversationId } = this.context
     if (!conversationId) {
       return []
     }
     try {
-      return getHistoryForAgent(conversationId, limit > 0 ? limit : undefined)
+      return getHistoryForAgent(conversationId)
     } catch (error) {
       aiLogger.warn('Agent', 'Failed to load history from DB, using empty history', { conversationId, error })
       return []
@@ -330,6 +328,7 @@ export class Agent {
         }
       }
 
+      // summary 作为 assistant 消息传给 LLM（它是压缩后的上下文摘要）
       return {
         role: 'assistant',
         content: [{ type: 'text', text: msg.content || '' }],
diff --git a/electron/main/ai/agent/types.ts b/electron/main/ai/agent/types.ts
index 0147dd81..effa5c6f 100644
--- a/electron/main/ai/agent/types.ts
+++ b/electron/main/ai/agent/types.ts
@@ -12,8 +12,6 @@ export type { TokenUsage, AgentRuntimeStatus, SerializedErrorInfo } from '../../
 export interface AgentConfig {
   /** 最大工具调用轮数（防止无限循环） */
   maxToolRounds?: number
-  /** 注入模型的历史消息上限（user+assistant） */
-  contextHistoryLimit?: number
   /** 中止信号，用于取消执行 */
   abortSignal?: AbortSignal
 }
diff --git a/electron/main/ai/compression/index.ts b/electron/main/ai/compression/index.ts
new file mode 100644
index 00000000..b3036d4f
--- /dev/null
+++ b/electron/main/ai/compression/index.ts
@@ -0,0 +1,312 @@
+/**
+ * 上下文压缩服务
+ * 在 Agent 推理前同步执行，将过长的对话历史压缩为摘要。
+ *
+ * 核心流程：
+ *   1. 计算当前上下文总 token → 未超阈值则跳过
+ *   2. 确定缓冲区：最近 bufferSizePercent% context window 的消息原文
+ *   3. 缓冲区之前的消息（含旧 summary）→ LLM 压缩为新摘要
+ *   4. 写入 ai_message(role='summary')，替换旧 summary
+ *   5. Thrashing 检查
+ */
+
+import { countTokens, countMessagesTokens } from '../tokenizer'
+import {
+  getLatestSummary,
+  getMessagesAfterSummary,
+  getAllUserAssistantMessages,
+  addSummaryMessage,
+  getMessageCountAfterSummary,
+} from '../conversations'
+import { buildPiModel, getActiveConfig, findModelDefinition } from '../llm'
+import type { AIServiceConfig } from '../llm/types'
+import { completeSimple, type TextContent as PiTextContent } from '@mariozechner/pi-ai'
+import { aiLogger } from '../logger'
+
+// ==================== 类型定义 ====================
+
+export interface CompressionConfig {
+  enabled: boolean
+  /** 触发压缩的 token 阈值百分比（相对于 context window），默认 75 */
+  tokenThresholdPercent: number
+  /** 保留最近消息的缓冲区大小（相对于 context window 的百分比），默认 20 */
+  bufferSizePercent: number
+  /** 独立压缩模型配置（为空则使用当前对话模型） */
+  compressionModelConfigId?: string
+  /** 单次工具返回的最大上下文占比（相对于 context window 的百分比），默认 35 */
+  maxToolResultPercent?: number
+}
+
+export interface CompressionResult {
+  compressed: boolean
+  reason:
+    | 'skipped_disabled'
+    | 'skipped_below_threshold'
+    | 'skipped_idempotent'
+    | 'success'
+    | 'fallback_truncated'
+    | 'thrashing'
+    | 'error'
+  tokensBefore?: number
+  tokensAfter?: number
+  error?: string
+}
+
+const DEFAULT_COMPRESSION_PROMPT = `Please compress the following conversation history into a concise summary, preserving key information, decisions, and context.
+Requirements:
+- Preserve key facts, data, names, and conclusions
+- Preserve user preferences and important instructions
+- Preserve time points and important events
+- Output in the same language as the conversation
+- Keep it within {maxTokens} tokens
+
+Conversation history:
+{messages}`
+
+const DEFAULT_CONTEXT_WINDOW = 128000
+
+// ==================== 核心压缩逻辑 ====================
+
+/**
+ * 检查并执行上下文压缩（同步，在 Agent 推理前调用）
+ */
+export async function checkAndCompress(
+  conversationId: string,
+  config: CompressionConfig,
+  systemPrompt: string,
+  activeAIConfig: AIServiceConfig
+): Promise<CompressionResult> {
+  if (!config.enabled) {
+    return { compressed: false, reason: 'skipped_disabled' }
+  }
+
+  try {
+    const contextWindow = resolveContextWindow(config, activeAIConfig)
+    const thresholdTokens = Math.floor(contextWindow * (config.tokenThresholdPercent / 100) * 0.95)
+
+    // 收集当前上下文消息
+    const summary = getLatestSummary(conversationId)
+    const messages = summary
+      ? getMessagesAfterSummary(conversationId, summary.timestamp)
+      : getAllUserAssistantMessages(conversationId)
+
+    // 构建 token 计算的消息列表
+    const historyForTokenCount: Array<{ role: string; content: string }> = []
+    if (summary) {
+      historyForTokenCount.push({ role: 'assistant', content: summary.content })
+    }
+    for (const msg of messages) {
+      historyForTokenCount.push({ role: msg.role, content: msg.content })
+    }
+
+    const currentTokens = countMessagesTokens(historyForTokenCount, systemPrompt)
+
+    aiLogger.info('Compression', `Token check: ${currentTokens} / ${thresholdTokens} (${contextWindow} window)`, {
+      conversationId,
+      messageCount: messages.length,
+      hasSummary: !!summary,
+    })
+
+    if (currentTokens < thresholdTokens) {
+      return { compressed: false, reason: 'skipped_below_threshold', tokensBefore: currentTokens }
+    }
+
+    // 确定缓冲区（保留最近 N% 的消息）
+    const bufferTokenBudget = Math.floor(contextWindow * (config.bufferSizePercent / 100))
+    const { bufferMessages, messagesToCompress } = splitMessagesForCompression(messages, summary, bufferTokenBudget)
+
+    if (messagesToCompress.length === 0) {
+      return { compressed: false, reason: 'skipped_below_threshold', tokensBefore: currentTokens }
+    }
+
+    // 构建压缩输入文本
+    const compressInput = buildCompressionInput(messagesToCompress, summary)
+    const targetTokens = Math.floor(contextWindow * 0.1)
+
+    // 三级降级：独立模型 → 当前模型 → 强制截断
+    let summaryText: string | null = null
+
+    // 尝试用配置的压缩模型
+    if (config.compressionModelConfigId) {
+      summaryText = await tryCompress(config.compressionModelConfigId, compressInput, targetTokens)
+    }
+
+    // 降级到当前模型
+    if (!summaryText) {
+      summaryText = await tryCompressWithConfig(activeAIConfig, compressInput, targetTokens)
+    }
+
+    // 最终降级：强制截断
+    if (!summaryText) {
+      aiLogger.warn('Compression', 'LLM compression failed, falling back to truncation')
+      summaryText = forceTruncate(compressInput, targetTokens)
+    }
+
+    // 写入 summary
+    addSummaryMessage(conversationId, summaryText)
+
+    // Thrashing 检查：压缩后重新计算 token
+    const afterMessages = getMessagesAfterSummary(conversationId, Date.now() / 1000 - 1)
+    const afterTokenCount: Array<{ role: string; content: string }> = [
+      { role: 'assistant', content: summaryText },
+      ...afterMessages.map((m) => ({ role: m.role, content: m.content })),
+    ]
+    const tokensAfter = countMessagesTokens(afterTokenCount, systemPrompt)
+
+    if (tokensAfter >= thresholdTokens) {
+      aiLogger.warn(
+        'Compression',
+        `Thrashing detected: ${tokensAfter} tokens after compression still >= ${thresholdTokens}`
+      )
+      return { compressed: true, reason: 'thrashing', tokensBefore: currentTokens, tokensAfter }
+    }
+
+    aiLogger.info('Compression', `Compressed: ${currentTokens} → ${tokensAfter} tokens`)
+    return { compressed: true, reason: 'success', tokensBefore: currentTokens, tokensAfter }
+  } catch (error) {
+    aiLogger.error('Compression', 'Compression failed', { error: String(error) })
+    return { compressed: false, reason: 'error', error: String(error) }
+  }
+}
+
+/**
+ * 手动压缩（用户手动触发，含幂等检查）
+ */
+export async function manualCompress(
+  conversationId: string,
+  config: CompressionConfig,
+  systemPrompt: string,
+  activeAIConfig: AIServiceConfig
+): Promise<CompressionResult> {
+  const messageCount = getMessageCountAfterSummary(conversationId)
+  if (messageCount < 5) {
+    return { compressed: false, reason: 'skipped_idempotent' }
+  }
+
+  // 手动压缩忽略阈值，强制执行
+  const overrideConfig = { ...config, enabled: true, tokenThresholdPercent: 0 }
+  return checkAndCompress(conversationId, overrideConfig, systemPrompt, activeAIConfig)
+}
+
+// ==================== 内部辅助函数 ====================
+
+function resolveContextWindow(_config: CompressionConfig, activeAIConfig: AIServiceConfig): number {
+  const modelDef = findModelDefinition(activeAIConfig.provider, activeAIConfig.model || '')
+  return modelDef?.contextWindow ?? DEFAULT_CONTEXT_WINDOW
+}
+
+interface SplitResult {
+  bufferMessages: Array<{ role: string; content: string; timestamp: number }>
+  messagesToCompress: Array<{ role: string; content: string; timestamp: number }>
+}
+
+function splitMessagesForCompression(
+  messages: Array<{ role: string; content: string; timestamp: number }>,
+  summary: { content: string } | null,
+  bufferTokenBudget: number
+): SplitResult {
+  let bufferTokens = 0
+  let splitIndex = messages.length
+
+  // 从最近的消息向前累计，直到达到缓冲区预算
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msgTokens = countTokens(messages[i].content) + 4
+    if (bufferTokens + msgTokens > bufferTokenBudget) {
+      splitIndex = i + 1
+      break
+    }
+    bufferTokens += msgTokens
+    if (i === 0) {
+      splitIndex = 0
+    }
+  }
+
+  return {
+    bufferMessages: messages.slice(splitIndex),
+    messagesToCompress: messages.slice(0, splitIndex),
+  }
+}
+
+function buildCompressionInput(
+  messagesToCompress: Array<{ role: string; content: string }>,
+  existingSummary: { content: string } | null
+): string {
+  const parts: string[] = []
+
+  if (existingSummary) {
+    parts.push(`[Previous Summary]\n${existingSummary.content}\n`)
+  }
+
+  for (const msg of messagesToCompress) {
+    const roleLabel = msg.role === 'user' ? 'User' : 'Assistant'
+    parts.push(`${roleLabel}: ${msg.content}`)
+  }
+
+  return parts.join('\n\n')
+}
+
+async function tryCompress(configId: string, input: string, targetTokens: number): Promise<string | null> {
+  try {
+    const { getAllConfigs } = await import('../llm')
+    const allConfigs = getAllConfigs()
+    const config = allConfigs.find((c) => c.id === configId)
+    if (!config) return null
+
+    return await tryCompressWithConfig(config, input, targetTokens)
+  } catch (error) {
+    aiLogger.warn('Compression', `Compression with config ${configId} failed`, { error: String(error) })
+    return null
+  }
+}
+
+async function tryCompressWithConfig(
+  aiConfig: AIServiceConfig,
+  input: string,
+  targetTokens: number
+): Promise<string | null> {
+  try {
+    const piModel = buildPiModel(aiConfig)
+    const prompt = DEFAULT_COMPRESSION_PROMPT.replace('{maxTokens}', String(targetTokens)).replace('{messages}', input)
+
+    const result = await completeSimple(
+      piModel,
+      {
+        systemPrompt: undefined,
+        messages: [
+          {
+            role: 'user',
+            content: [{ type: 'text', text: prompt }],
+            timestamp: Date.now(),
+          },
+        ] as any,
+      },
+      {
+        apiKey: aiConfig.apiKey,
+        maxTokens: targetTokens,
+      }
+    )
+
+    const text = result.content
+      .filter((item): item is PiTextContent => item.type === 'text')
+      .map((item) => item.text)
+      .join('')
+
+    return text || null
+  } catch (error) {
+    aiLogger.warn('Compression', 'LLM compression attempt failed', { error: String(error) })
+    return null
+  }
+}
+
+function forceTruncate(input: string, targetTokens: number): string {
+  const lines = input.split('\n')
+  const result: string[] = []
+  let tokens = 0
+  for (const line of lines) {
+    const lineTokens = countTokens(line)
+    if (tokens + lineTokens > targetTokens) break
+    result.push(line)
+    tokens += lineTokens
+  }
+  return result.join('\n') || input.slice(0, targetTokens * 3)
+}
diff --git a/electron/main/ai/conversations.ts b/electron/main/ai/conversations.ts
index 49073bd9..86067b91 100644
--- a/electron/main/ai/conversations.ts
+++ b/electron/main/ai/conversations.ts
@@ -87,6 +87,12 @@ function migrateAiDatabase(db: Database.Database): void {
       console.log('[AI DB Migration] Adding content_blocks column')
     }
 
+    // 检查并添加 token_usage 列（JSON: {promptTokens, completionTokens, totalTokens}）
+    if (!messageColumns.includes('token_usage')) {
+      db.exec('ALTER TABLE ai_message ADD COLUMN token_usage TEXT')
+      console.log('[AI DB Migration] Adding token_usage column to ai_message')
+    }
+
     // 获取 ai_conversation 表的列信息
     const convTableInfo = db.pragma('table_info(ai_conversation)') as Array<{ name: string }>
     const convColumns = convTableInfo.map((col) => col.name)
@@ -145,16 +151,26 @@ export type ContentBlock =
 /**
  * AI 消息类型
  */
+export type AIMessageRole = 'user' | 'assistant' | 'summary'
+
+export interface TokenUsageData {
+  promptTokens: number
+  completionTokens: number
+  totalTokens: number
+}
+
 export interface AIMessage {
   id: string
   conversationId: string
-  role: 'user' | 'assistant'
+  role: AIMessageRole
   content: string
   timestamp: number
   dataKeywords?: string[]
   dataMessageCount?: number
   /** AI 消息的内容块数组（按时序排列的文本和工具调用） */
   contentBlocks?: ContentBlock[]
+  /** 本次 Agent 执行的 token 使用量（仅 assistant 消息） */
+  tokenUsage?: TokenUsageData
 }
 
 // ==================== 对话管理 ====================
@@ -283,11 +299,12 @@ export function deleteConversation(conversationId: string): boolean {
  */
 export function addMessage(
   conversationId: string,
-  role: 'user' | 'assistant',
+  role: AIMessageRole,
   content: string,
   dataKeywords?: string[],
   dataMessageCount?: number,
-  contentBlocks?: ContentBlock[]
+  contentBlocks?: ContentBlock[],
+  tokenUsage?: TokenUsageData
 ): AIMessage {
   const db = getAiDb()
   const now = Math.floor(Date.now() / 1000)
@@ -295,8 +312,8 @@ export function addMessage(
 
   db.prepare(
     `
-    INSERT INTO ai_message (id, conversation_id, role, content, timestamp, data_keywords, data_message_count, content_blocks)
-    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+    INSERT INTO ai_message (id, conversation_id, role, content, timestamp, data_keywords, data_message_count, content_blocks, token_usage)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
   `
   ).run(
     id,
@@ -306,7 +323,8 @@ export function addMessage(
     now,
     dataKeywords ? JSON.stringify(dataKeywords) : null,
     dataMessageCount ?? null,
-    contentBlocks ? JSON.stringify(contentBlocks) : null
+    contentBlocks ? JSON.stringify(contentBlocks) : null,
+    tokenUsage ? JSON.stringify(tokenUsage) : null
   )
 
   // 更新对话的 updated_at
@@ -321,6 +339,7 @@ export function addMessage(
     dataKeywords,
     dataMessageCount,
     contentBlocks,
+    tokenUsage,
   }
 }
 
@@ -341,7 +360,8 @@ export function getMessages(conversationId: string): AIMessage[] {
       timestamp,
       data_keywords as dataKeywords,
       data_message_count as dataMessageCount,
-      content_blocks as contentBlocks
+      content_blocks as contentBlocks,
+      token_usage as tokenUsage
     FROM ai_message
     WHERE conversation_id = ?
     ORDER BY timestamp ASC
@@ -356,17 +376,19 @@ export function getMessages(conversationId: string): AIMessage[] {
     dataKeywords: string | null
     dataMessageCount: number | null
     contentBlocks: string | null
+    tokenUsage: string | null
   }>
 
   return rows.map((row) => ({
     id: row.id,
     conversationId: row.conversationId,
-    role: row.role as 'user' | 'assistant',
+    role: row.role as AIMessageRole,
     content: row.content,
     timestamp: row.timestamp,
     dataKeywords: row.dataKeywords ? JSON.parse(row.dataKeywords) : undefined,
     dataMessageCount: row.dataMessageCount ?? undefined,
     contentBlocks: row.contentBlocks ? JSON.parse(row.contentBlocks) : undefined,
+    tokenUsage: row.tokenUsage ? JSON.parse(row.tokenUsage) : undefined,
   }))
 }
 
@@ -379,26 +401,206 @@ export function deleteMessage(messageId: string): boolean {
   return result.changes > 0
 }
 
+/**
+ * 获取对话的累计 token 使用量（聚合所有 assistant 消息的 token_usage）
+ */
+export function getConversationTokenUsage(conversationId: string): TokenUsageData {
+  const db = getAiDb()
+  const row = db
+    .prepare(
+      `
+    SELECT
+      COALESCE(SUM(json_extract(token_usage, '$.promptTokens')), 0) as promptTokens,
+      COALESCE(SUM(json_extract(token_usage, '$.completionTokens')), 0) as completionTokens,
+      COALESCE(SUM(json_extract(token_usage, '$.totalTokens')), 0) as totalTokens
+    FROM ai_message
+    WHERE conversation_id = ? AND token_usage IS NOT NULL
+  `
+    )
+    .get(conversationId) as { promptTokens: number; completionTokens: number; totalTokens: number }
+
+  return {
+    promptTokens: row.promptTokens,
+    completionTokens: row.completionTokens,
+    totalTokens: row.totalTokens,
+  }
+}
+
 // ==================== Agent 专用 ====================
 
 /**
  * 为 Agent 提供对话历史
  *
  * 返回简化的 {role, content} 格式，按时间升序排列。
+ * 当存在 summary 消息时，返回最新 summary + summary 之后的 user/assistant 消息，
+ * 以避免重复加载已被压缩的旧消息。
+ *
  * @param conversationId 对话 ID
- * @param maxMessages 最大返回条数（取最近 N 条）
+ * @param maxMessages 最大返回条数（取最近 N 条，仅对 summary 之后的消息生效）
  */
 export function getHistoryForAgent(
   conversationId: string,
   maxMessages?: number
-): Array<{ role: 'user' | 'assistant'; content: string }> {
+): Array<{ role: 'user' | 'assistant' | 'summary'; content: string }> {
   const messages = getMessages(conversationId)
-  const filtered = messages
-    .filter((m) => (m.role === 'user' || m.role === 'assistant') && m.content?.trim())
-    .map((m) => ({ role: m.role, content: m.content }))
+  const validMessages = messages.filter(
+    (m) => (m.role === 'user' || m.role === 'assistant' || m.role === 'summary') && m.content?.trim()
+  )
 
-  if (maxMessages && filtered.length > maxMessages) {
-    return filtered.slice(-maxMessages)
+  // 查找最新的 summary 消息位置
+  let summaryIndex = -1
+  for (let i = validMessages.length - 1; i >= 0; i--) {
+    if (validMessages[i].role === 'summary') {
+      summaryIndex = i
+      break
+    }
   }
-  return filtered
+
+  let result: Array<{ role: 'user' | 'assistant' | 'summary'; content: string }>
+
+  if (summaryIndex >= 0) {
+    // 返回 summary + summary 之后的消息
+    result = validMessages.slice(summaryIndex).map((m) => ({ role: m.role, content: m.content }))
+  } else {
+    result = validMessages.map((m) => ({ role: m.role, content: m.content }))
+  }
+
+  if (maxMessages && result.length > maxMessages) {
+    // 如果有 summary 且它是第一条，保留它再截取后面的
+    if (result.length > 0 && result[0].role === 'summary') {
+      const rest = result.slice(1)
+      const truncated = rest.slice(-(maxMessages - 1))
+      return [result[0], ...truncated]
+    }
+    return result.slice(-maxMessages)
+  }
+  return result
+}
+
+// ==================== Summary / 压缩专用 ====================
+
+/**
+ * 添加 summary 消息并替换旧的 summary（每个对话只保留一条最新 summary）
+ */
+export function addSummaryMessage(conversationId: string, content: string): AIMessage {
+  const db = getAiDb()
+
+  // 删除该对话中所有旧的 summary 消息
+  db.prepare("DELETE FROM ai_message WHERE conversation_id = ? AND role = 'summary'").run(conversationId)
+
+  return addMessage(conversationId, 'summary', content)
+}
+
+/**
+ * 获取对话中最新的 summary 消息
+ */
+export function getLatestSummary(conversationId: string): AIMessage | null {
+  const db = getAiDb()
+  const row = db
+    .prepare(
+      `
+    SELECT id, conversation_id as conversationId, role, content, timestamp,
+           data_keywords as dataKeywords, data_message_count as dataMessageCount, content_blocks as contentBlocks
+    FROM ai_message
+    WHERE conversation_id = ? AND role = 'summary'
+    ORDER BY timestamp DESC
+    LIMIT 1
+  `
+    )
+    .get(conversationId) as
+    | {
+        id: string
+        conversationId: string
+        role: string
+        content: string
+        timestamp: number
+        dataKeywords: string | null
+        dataMessageCount: number | null
+        contentBlocks: string | null
+      }
+    | undefined
+
+  if (!row) return null
+  return {
+    id: row.id,
+    conversationId: row.conversationId,
+    role: row.role as AIMessageRole,
+    content: row.content,
+    timestamp: row.timestamp,
+    dataKeywords: row.dataKeywords ? JSON.parse(row.dataKeywords) : undefined,
+    dataMessageCount: row.dataMessageCount ?? undefined,
+    contentBlocks: row.contentBlocks ? JSON.parse(row.contentBlocks) : undefined,
+  }
+}
+
+/**
+ * 获取 summary 之后的所有 user/assistant 消息（用于压缩计算）
+ */
+export function getMessagesAfterSummary(
+  conversationId: string,
+  summaryTimestamp: number
+): Array<{ role: AIMessageRole; content: string; timestamp: number }> {
+  const db = getAiDb()
+  const rows = db
+    .prepare(
+      `
+    SELECT role, content, timestamp
+    FROM ai_message
+    WHERE conversation_id = ? AND timestamp > ? AND role IN ('user', 'assistant')
+    ORDER BY timestamp ASC
+  `
+    )
+    .all(conversationId, summaryTimestamp) as Array<{
+    role: string
+    content: string
+    timestamp: number
+  }>
+
+  return rows.map((r) => ({ role: r.role as AIMessageRole, content: r.content, timestamp: r.timestamp }))
+}
+
+/**
+ * 获取对话中所有 user/assistant 消息（不含 summary，用于首次压缩）
+ */
+export function getAllUserAssistantMessages(
+  conversationId: string
+): Array<{ role: AIMessageRole; content: string; timestamp: number }> {
+  const db = getAiDb()
+  const rows = db
+    .prepare(
+      `
+    SELECT role, content, timestamp
+    FROM ai_message
+    WHERE conversation_id = ? AND role IN ('user', 'assistant')
+    ORDER BY timestamp ASC
+  `
+    )
+    .all(conversationId) as Array<{
+    role: string
+    content: string
+    timestamp: number
+  }>
+
+  return rows.map((r) => ({ role: r.role as AIMessageRole, content: r.content, timestamp: r.timestamp }))
+}
+
+/**
+ * 获取对话中 summary 之后的 user/assistant 消息数量
+ */
+export function getMessageCountAfterSummary(conversationId: string): number {
+  const summary = getLatestSummary(conversationId)
+  if (!summary) {
+    const db = getAiDb()
+    const row = db
+      .prepare("SELECT COUNT(*) as count FROM ai_message WHERE conversation_id = ? AND role IN ('user', 'assistant')")
+      .get(conversationId) as { count: number }
+    return row.count
+  }
+  const db = getAiDb()
+  const row = db
+    .prepare(
+      "SELECT COUNT(*) as count FROM ai_message WHERE conversation_id = ? AND timestamp > ? AND role IN ('user', 'assistant')"
+    )
+    .get(conversationId, summary.timestamp) as { count: number }
+  return row.count
 }
diff --git a/electron/main/ai/tokenizer/index.ts b/electron/main/ai/tokenizer/index.ts
new file mode 100644
index 00000000..2f436647
--- /dev/null
+++ b/electron/main/ai/tokenizer/index.ts
@@ -0,0 +1,47 @@
+/**
+ * Token 计数模块
+ * 使用 js-tiktoken 的 cl100k_base 编码进行近似 token 计数。
+ * 该编码是 GPT-4/Claude 系列的近似值，对国内模型有一定误差，
+ * 因此阈值计算时预留了余量。
+ */
+
+import { encodingForModel } from 'js-tiktoken'
+
+let encoder: ReturnType<typeof encodingForModel> | null = null
+
+function getEncoder() {
+  if (!encoder) {
+    encoder = encodingForModel('gpt-4o')
+  }
+  return encoder
+}
+
+/**
+ * 计算单段文本的 token 数
+ */
+export function countTokens(text: string): number {
+  if (!text) return 0
+  return getEncoder().encode(text).length
+}
+
+/**
+ * 计算消息列表的总 token 数（含 systemPrompt）
+ * 每条消息额外计 4 tokens 的格式开销（role + 分隔符）
+ */
+export function countMessagesTokens(messages: Array<{ role: string; content: string }>, systemPrompt?: string): number {
+  const enc = getEncoder()
+  let total = 0
+
+  if (systemPrompt) {
+    total += enc.encode(systemPrompt).length + 4
+  }
+
+  for (const msg of messages) {
+    total += enc.encode(msg.content).length + 4
+  }
+
+  // 回复引导 token
+  total += 3
+
+  return total
+}
diff --git a/electron/main/ai/tools/definitions/index.ts b/electron/main/ai/tools/definitions/index.ts
index 858e3dcd..c7fe256c 100644
--- a/electron/main/ai/tools/definitions/index.ts
+++ b/electron/main/ai/tools/definitions/index.ts
@@ -30,19 +30,34 @@ export { sqlToolEntries } from './sql-analysis'
 export const TOOL_REGISTRY: ToolRegistryEntry[] = [
   // ==================== Core 工具（始终加载） ====================
   { name: 'get_chat_overview', factory: createGetChatOverview, category: 'core' },
-  { name: 'search_messages', factory: createSearchMessages, category: 'core' },
-  { name: 'deep_search_messages', factory: createDeepSearchMessages, category: 'core' },
-  { name: 'get_recent_messages', factory: createGetRecentMessages, category: 'core' },
-  { name: 'get_message_context', factory: createGetMessageContext, category: 'core' },
+  { name: 'search_messages', factory: createSearchMessages, category: 'core', truncationStrategy: 'keep_first' },
+  {
+    name: 'deep_search_messages',
+    factory: createDeepSearchMessages,
+    category: 'core',
+    truncationStrategy: 'keep_first',
+  },
+  { name: 'get_recent_messages', factory: createGetRecentMessages, category: 'core', truncationStrategy: 'keep_last' },
+  { name: 'get_message_context', factory: createGetMessageContext, category: 'core', truncationStrategy: 'keep_last' },
   { name: 'search_sessions', factory: createSearchSessions, category: 'core' },
-  { name: 'get_session_messages', factory: createGetSessionMessages, category: 'core' },
+  {
+    name: 'get_session_messages',
+    factory: createGetSessionMessages,
+    category: 'core',
+    truncationStrategy: 'keep_last',
+  },
   { name: 'get_members', factory: createGetMembers, category: 'core' },
 
   // ==================== Analysis 工具（按需加载） ====================
   { name: 'get_member_stats', factory: createGetMemberStats, category: 'analysis' },
   { name: 'get_time_stats', factory: createGetTimeStats, category: 'analysis' },
   { name: 'get_member_name_history', factory: createGetMemberNameHistory, category: 'analysis' },
-  { name: 'get_conversation_between', factory: createGetConversationBetween, category: 'analysis' },
+  {
+    name: 'get_conversation_between',
+    factory: createGetConversationBetween,
+    category: 'analysis',
+    truncationStrategy: 'keep_last',
+  },
   { name: 'get_session_summaries', factory: createGetSessionSummaries, category: 'analysis' },
   { name: 'response_time_analysis', factory: createResponseTimeAnalysis, category: 'analysis' },
   { name: 'keyword_frequency', factory: createKeywordFrequency, category: 'analysis' },
diff --git a/electron/main/ai/tools/index.ts b/electron/main/ai/tools/index.ts
index 9a419dcd..d488ae92 100644
--- a/electron/main/ai/tools/index.ts
+++ b/electron/main/ai/tools/index.ts
@@ -6,16 +6,21 @@
  */
 
 import type { AgentTool } from '@mariozechner/pi-agent-core'
-import type { ToolContext } from './types'
+import type { ToolContext, TruncationStrategy } from './types'
 import { TOOL_REGISTRY } from './definitions'
 
 const CORE_TOOL_NAMES = new Set(TOOL_REGISTRY.filter((e) => e.category === 'core').map((e) => e.name))
 import { t as i18nT } from '../../i18n'
 import { preprocessMessages, type PreprocessableMessage } from '../preprocessor'
 import { formatMessageCompact } from './utils/format'
+import { countTokens } from '../tokenizer'
 import { getSkillConfig } from '../skills'
 import type { SkillDef } from '../skills/types'
 
+const TRUNCATION_STRATEGY_MAP = new Map<string, TruncationStrategy>(
+  TOOL_REGISTRY.filter((e) => e.truncationStrategy).map((e) => [e.name, e.truncationStrategy!])
+)
+
 // 导出类型
 export * from './types'
 
@@ -128,12 +133,35 @@ function wrapWithPreprocessing(tool: AgentTool<any>, context: ToolContext): Agen
         nameMapLine = anonymizeMessageNames(processed, context.ownerInfo?.platformId)
       }
 
-      const formatted = processed.map((m) => formatMessageCompact(m, context.locale))
+      let formatted = processed.map((m) => formatMessageCompact(m, context.locale))
+
+      // Token-aware 截断：超出预算时按策略裁剪消息列表
+      let wasTruncated = false
+      const originalCount = formatted.length
+      if (context.maxToolResultTokens && context.maxToolResultTokens > 0) {
+        const truncResult = truncateFormattedMessages(
+          formatted,
+          context.maxToolResultTokens,
+          TRUNCATION_STRATEGY_MAP.get(tool.name) ?? 'keep_last'
+        )
+        if (truncResult.wasTruncated) {
+          formatted = truncResult.messages
+          wasTruncated = true
+        }
+      }
 
       const { rawMessages: _rawMessages, ...restDetails } = details
-      const finalDetails = { ...restDetails, messages: formatted, returned: processed.length }
+      const finalDetails = { ...restDetails, messages: formatted, returned: formatted.length }
 
       let textContent = formatToolResultAsText(finalDetails)
+
+      if (wasTruncated) {
+        const strategy = TRUNCATION_STRATEGY_MAP.get(tool.name) ?? 'keep_last'
+        const strategyDesc = strategy === 'keep_first' ? 'most relevant' : 'most recent'
+        const notice = `⚠️ Results truncated: ${originalCount} messages found, showing ${formatted.length} ${strategyDesc} due to context limit. Use a narrower time range or more specific keywords for more precise results.`
+        textContent = notice + '\n' + textContent
+      }
+
       if (nameMapLine) {
         textContent = nameMapLine + '\n' + textContent
       }
@@ -146,6 +174,51 @@ function wrapWithPreprocessing(tool: AgentTool<any>, context: ToolContext): Agen
   }
 }
 
+/**
+ * Token-aware 截断：在 token 预算内保留尽可能多的消息
+ */
+function truncateFormattedMessages(
+  formatted: string[],
+  maxTokens: number,
+  strategy: TruncationStrategy
+): { messages: string[]; wasTruncated: boolean } {
+  // 预留 token 给元数据头部和截断提示
+  const budget = maxTokens - 200
+
+  // 先快速估算总 token，如果未超预算则直接返回
+  let totalTokens = 0
+  for (const line of formatted) {
+    totalTokens += countTokens(line) + 1
+  }
+  if (totalTokens <= budget) {
+    return { messages: formatted, wasTruncated: false }
+  }
+
+  if (strategy === 'keep_first') {
+    let tokens = 0
+    let cutIndex = formatted.length
+    for (let i = 0; i < formatted.length; i++) {
+      tokens += countTokens(formatted[i]) + 1
+      if (tokens > budget) {
+        cutIndex = i
+        break
+      }
+    }
+    return { messages: formatted.slice(0, cutIndex), wasTruncated: cutIndex < formatted.length }
+  } else {
+    let tokens = 0
+    let cutIndex = 0
+    for (let i = formatted.length - 1; i >= 0; i--) {
+      tokens += countTokens(formatted[i]) + 1
+      if (tokens > budget) {
+        cutIndex = i + 1
+        break
+      }
+    }
+    return { messages: formatted.slice(cutIndex), wasTruncated: cutIndex > 0 }
+  }
+}
+
 /**
  * 昵称匿名化：用 U{senderId} 替代真实昵称
  * 就地修改 messages 的 senderName，返回映射表文本行
diff --git a/electron/main/ai/tools/types.ts b/electron/main/ai/tools/types.ts
index 232fdec5..f11c7114 100644
--- a/electron/main/ai/tools/types.ts
+++ b/electron/main/ai/tools/types.ts
@@ -9,10 +9,14 @@ export type ToolCategory = 'core' | 'analysis'
 
 export type ToolFactory = (context: ToolContext) => AgentTool<any>
 
+export type TruncationStrategy = 'keep_first' | 'keep_last'
+
 export interface ToolRegistryEntry {
   name: string
   factory: ToolFactory
   category: ToolCategory
+  /** 截断策略：keep_first=保留前N条(搜索类), keep_last=保留后N条(时序类) */
+  truncationStrategy?: TruncationStrategy
 }
 
 /** Owner 信息（当前用户在对话中的身份） */
@@ -57,4 +61,6 @@ export interface ToolContext {
   searchContextBefore?: number
   /** 搜索结果上下文：向后取多少条（默认 3） */
   searchContextAfter?: number
+  /** 单次工具返回的最大 token 数（基于 context window 动态计算） */
+  maxToolResultTokens?: number
 }
diff --git a/electron/main/ipc/ai.ts b/electron/main/ipc/ai.ts
index 647b6be9..0b5ed274 100644
--- a/electron/main/ipc/ai.ts
+++ b/electron/main/ipc/ai.ts
@@ -11,6 +11,8 @@ import { getLogsDir } from '../paths'
 import { Agent, type AgentStreamChunk, type SkillContext } from '../ai/agent'
 import { getDefaultGeneralAssistantId } from '../ai/assistant/defaultGeneral'
 import { getActiveConfig, buildPiModel } from '../ai/llm'
+import { checkAndCompress, manualCompress, type CompressionConfig } from '../ai/compression'
+import { countMessagesTokens } from '../ai/tokenizer'
 import * as assistantManager from '../ai/assistant'
 import type { AssistantConfig } from '../ai/assistant/types'
 import * as skillManager from '../ai/skills'
@@ -312,14 +314,23 @@ export function registerAIHandlers({ win }: IpcContext): void {
     async (
       _,
       conversationId: string,
-      role: 'user' | 'assistant',
+      role: aiConversations.AIMessageRole,
       content: string,
       dataKeywords?: string[],
       dataMessageCount?: number,
-      contentBlocks?: aiConversations.ContentBlock[]
+      contentBlocks?: aiConversations.ContentBlock[],
+      tokenUsage?: aiConversations.TokenUsageData
     ) => {
       try {
-        return aiConversations.addMessage(conversationId, role, content, dataKeywords, dataMessageCount, contentBlocks)
+        return aiConversations.addMessage(
+          conversationId,
+          role,
+          content,
+          dataKeywords,
+          dataMessageCount,
+          contentBlocks,
+          tokenUsage
+        )
       } catch (error) {
         console.error('Failed to add AI message:', error)
         throw error
@@ -339,6 +350,18 @@ export function registerAIHandlers({ win }: IpcContext): void {
     }
   })
 
+  /**
+   * 获取对话的累计 token 使用量
+   */
+  ipcMain.handle('ai:getConversationTokenUsage', async (_, conversationId: string) => {
+    try {
+      return aiConversations.getConversationTokenUsage(conversationId)
+    } catch (error) {
+      console.error('Failed to get conversation token usage:', error)
+      return { promptTokens: 0, completionTokens: 0, totalTokens: 0 }
+    }
+  })
+
   /**
    * 删除 AI 消息
    */
@@ -1026,7 +1049,6 @@ export function registerAIHandlers({ win }: IpcContext): void {
    * Agent 通过 context.conversationId 从 SQLite 读取对话历史（数据流倒置）
    * @param chatType 聊天类型（'group' | 'private'）
    * @param locale 语言设置（可选，默认 'zh-CN'）
-   * @param maxHistoryRounds 前端用户配置的最大历史轮数（可选，每轮 = user + assistant = 2 条）
    * @param assistantId 助手 ID（可选，传入时从 AssistantManager 获取配置）
    */
   ipcMain.handle(
@@ -1038,10 +1060,10 @@ export function registerAIHandlers({ win }: IpcContext): void {
       context: ToolContext,
       chatType?: 'group' | 'private',
       locale?: string,
-      maxHistoryRounds?: number,
       assistantId?: string,
       skillId?: string | null,
-      enableAutoSkill?: boolean
+      enableAutoSkill?: boolean,
+      compressionConfig?: CompressionConfig
     ) => {
       aiLogger.info('IPC', `Agent stream request received: ${requestId}`, {
         userMessage: userMessage.slice(0, 100),
@@ -1063,14 +1085,61 @@ export function registerAIHandlers({ win }: IpcContext): void {
         }
         const piModel = buildPiModel(activeAIConfig)
 
-        const contextHistoryLimit = maxHistoryRounds ? maxHistoryRounds * 2 : undefined
+        // 上下文压缩前置步骤（在 Agent 创建之前执行）
+        if (compressionConfig?.enabled && context.conversationId) {
+          try {
+            win.webContents.send('agent:streamChunk', {
+              requestId,
+              chunk: {
+                type: 'status',
+                status: {
+                  phase: 'preparing',
+                  round: 0,
+                  toolsUsed: 0,
+                  contextTokens: 0,
+                  totalUsage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
+                  updatedAt: Date.now(),
+                } satisfies import('@electron/shared/types').AgentRuntimeStatus,
+              },
+            })
+
+            // 获取助手 systemPrompt 用于 token 计算
+            const tempAssistantConfig = assistantId
+              ? (assistantManager.getAssistantConfig(assistantId) ?? undefined)
+              : undefined
+            const systemPromptForCompression = tempAssistantConfig?.systemPrompt || ''
+
+            const compressionResult = await checkAndCompress(
+              context.conversationId,
+              compressionConfig,
+              systemPromptForCompression,
+              activeAIConfig
+            )
+
+            aiLogger.info('IPC', `Compression result for ${requestId}`, compressionResult)
+
+            if (compressionResult.compressed) {
+              win.webContents.send('agent:streamChunk', {
+                requestId,
+                chunk: {
+                  type: 'status',
+                  status: 'compressed',
+                  content: `Context compressed: ${compressionResult.tokensBefore} → ${compressionResult.tokensAfter} tokens`,
+                },
+              })
+            }
+          } catch (error) {
+            aiLogger.error('IPC', `Compression failed for ${requestId}, continuing without compression`, {
+              error: String(error),
+            })
+          }
+        }
 
         const pp = context.preprocessConfig
         aiLogger.info('IPC', `Agent context: ${requestId}`, {
           model: activeAIConfig.model,
           provider: activeAIConfig.provider,
           baseUrl: activeAIConfig.baseUrl || '(default)',
-          maxHistoryRounds: maxHistoryRounds ?? '(default)',
           maxMessagesLimit: context.maxMessagesLimit,
           hasTimeFilter: !!context.timeFilter,
           mentionedMembersCount: context.mentionedMembers?.length ?? 0,
@@ -1117,11 +1186,18 @@ export function registerAIHandlers({ win }: IpcContext): void {
           }
         }
 
+        // 工具结果 token 预算注入：基于 context window 百分比计算
+        const maxToolResultPercent = compressionConfig?.maxToolResultPercent ?? 50
+        const modelDef = llm.findModelDefinition(activeAIConfig.provider, activeAIConfig.model || '')
+        const resolvedContextWindow = modelDef?.contextWindow || 128000
+        const maxToolResultTokens = Math.floor(resolvedContextWindow * (maxToolResultPercent / 100))
+        const enrichedContext: ToolContext = { ...context, maxToolResultTokens }
+
         const agent = new Agent(
-          context,
+          enrichedContext,
           piModel,
           activeAIConfig.apiKey,
-          { abortSignal: abortController.signal, contextHistoryLimit },
+          { abortSignal: abortController.signal },
           chatType ?? 'group',
           locale ?? 'zh-CN',
           assistantConfig,
@@ -1237,6 +1313,36 @@ export function registerAIHandlers({ win }: IpcContext): void {
     }
   })
 
+  // ==================== 上下文压缩 ====================
+
+  ipcMain.handle(
+    'ai:compressContext',
+    async (_, conversationId: string, compressionConfig: CompressionConfig, systemPrompt: string) => {
+      try {
+        const activeAIConfig = getActiveConfig()
+        if (!activeAIConfig) {
+          return { success: false, error: t('llm.notConfigured') }
+        }
+
+        const result = await manualCompress(conversationId, compressionConfig, systemPrompt, activeAIConfig)
+        return { success: true, result }
+      } catch (error) {
+        aiLogger.error('IPC', 'Manual compression failed', { error: String(error) })
+        return { success: false, error: String(error) }
+      }
+    }
+  )
+
+  ipcMain.handle('ai:estimateContextTokens', async (_, conversationId: string) => {
+    try {
+      const history = aiConversations.getHistoryForAgent(conversationId)
+      const tokens = countMessagesTokens(history.map((m) => ({ role: m.role, content: m.content })))
+      return { success: true, tokens, messageCount: history.length }
+    } catch (error) {
+      return { success: false, tokens: 0, error: String(error) }
+    }
+  })
+
   // ==================== Embedding 多配置管理 ====================
 
   /**
diff --git a/electron/preload/apis/ai.ts b/electron/preload/apis/ai.ts
index 546b7e53..15ba212a 100644
--- a/electron/preload/apis/ai.ts
+++ b/electron/preload/apis/ai.ts
@@ -41,15 +41,24 @@ export type ContentBlock =
     }
   | { type: 'skill'; skillId: string; skillName: string }
 
+export type AIMessageRole = 'user' | 'assistant' | 'summary'
+
+export interface TokenUsageData {
+  promptTokens: number
+  completionTokens: number
+  totalTokens: number
+}
+
 export interface AIMessage {
   id: string
   conversationId: string
-  role: 'user' | 'assistant'
+  role: AIMessageRole
   content: string
   timestamp: number
   dataKeywords?: string[]
   dataMessageCount?: number
   contentBlocks?: ContentBlock[]
+  tokenUsage?: TokenUsageData
 }
 
 // LLM API 类型
@@ -479,11 +488,12 @@ export const aiApi = {
    */
   addMessage: (
     conversationId: string,
-    role: 'user' | 'assistant',
+    role: AIMessageRole,
     content: string,
     dataKeywords?: string[],
     dataMessageCount?: number,
-    contentBlocks?: ContentBlock[]
+    contentBlocks?: ContentBlock[],
+    tokenUsage?: TokenUsageData
   ): Promise<AIMessage> => {
     return ipcRenderer.invoke(
       'ai:addMessage',
@@ -492,7 +502,8 @@ export const aiApi = {
       content,
       dataKeywords,
       dataMessageCount,
-      contentBlocks
+      contentBlocks,
+      tokenUsage
     )
   },
 
@@ -503,6 +514,13 @@ export const aiApi = {
     return ipcRenderer.invoke('ai:getMessages', conversationId)
   },
 
+  /**
+   * 获取对话的累计 token 使用量
+   */
+  getConversationTokenUsage: (conversationId: string): Promise<TokenUsageData> => {
+    return ipcRenderer.invoke('ai:getConversationTokenUsage', conversationId)
+  },
+
   /**
    * 删除 AI 消息
    */
@@ -541,6 +559,36 @@ export const aiApi = {
   cancelToolTest: (testId: string): Promise<{ success: boolean }> => {
     return ipcRenderer.invoke('ai:cancelToolTest', testId)
   },
+
+  estimateContextTokens: (
+    conversationId: string
+  ): Promise<{ success: boolean; tokens: number; messageCount?: number; error?: string }> => {
+    return ipcRenderer.invoke('ai:estimateContextTokens', conversationId)
+  },
+
+  compressContext: (
+    conversationId: string,
+    compressionConfig: {
+      enabled: boolean
+      tokenThresholdPercent: number
+      bufferSizePercent: number
+      compressionModelConfigId?: string
+      maxToolResultPercent?: number
+    },
+    systemPrompt: string
+  ): Promise<{
+    success: boolean
+    result?: {
+      compressed: boolean
+      reason: string
+      tokensBefore?: number
+      tokensAfter?: number
+      error?: string
+    }
+    error?: string
+  }> => {
+    return ipcRenderer.invoke('ai:compressContext', conversationId, compressionConfig, systemPrompt)
+  },
 }
 
 // ==================== LLM API ====================
@@ -913,7 +961,6 @@ export const agentApi = {
    * Agent 通过 context.conversationId 从后端 SQLite 读取对话历史
    * @param chatType 聊天类型（'group' | 'private'）
    * @param locale 语言设置（可选，默认 'zh-CN'）
-   * @param maxHistoryRounds 最大历史轮数（可选，每轮 = user + assistant = 2 条）
    * @returns 返回 { requestId, promise }，requestId 可用于中止请求
    */
   runStream: (
@@ -922,10 +969,16 @@ export const agentApi = {
     onChunk?: (chunk: AgentStreamChunk) => void,
     chatType?: 'group' | 'private',
     locale?: string,
-    maxHistoryRounds?: number,
     assistantId?: string,
     skillId?: string | null,
-    enableAutoSkill?: boolean
+    enableAutoSkill?: boolean,
+    compressionConfig?: {
+      enabled: boolean
+      tokenThresholdPercent: number
+      bufferSizePercent: number
+      compressionModelConfigId?: string
+      maxToolResultPercent?: number
+    }
   ): {
     requestId: string
     promise: Promise<{ success: boolean; result?: AgentResult; error?: SerializedErrorInfo }>
@@ -1013,10 +1066,10 @@ export const agentApi = {
           sanitizedContext,
           chatType,
           locale,
-          maxHistoryRounds,
           assistantId,
           skillId,
-          enableAutoSkill
+          enableAutoSkill,
+          compressionConfig
         )
         .then((result) => {
           console.log('[preload] Agent invoke 返回:', result)
diff --git a/electron/preload/index.d.ts b/electron/preload/index.d.ts
index 6a6d4088..e6a37fa1 100644
--- a/electron/preload/index.d.ts
+++ b/electron/preload/index.d.ts
@@ -305,15 +305,24 @@ type AIContentBlock =
     }
   | { type: 'skill'; skillId: string; skillName: string }
 
+type AIMessageRole = 'user' | 'assistant' | 'summary'
+
+interface AITokenUsageData {
+  promptTokens: number
+  completionTokens: number
+  totalTokens: number
+}
+
 interface AIMessage {
   id: string
   conversationId: string
-  role: 'user' | 'assistant'
+  role: AIMessageRole
   content: string
   timestamp: number
   dataKeywords?: string[]
   dataMessageCount?: number
   contentBlocks?: AIContentBlock[]
+  tokenUsage?: AITokenUsageData
 }
 
 interface AiApi {
@@ -374,10 +383,12 @@ interface AiApi {
     content: string,
     dataKeywords?: string[],
     dataMessageCount?: number,
-    contentBlocks?: AIContentBlock[]
+    contentBlocks?: AIContentBlock[],
+    tokenUsage?: AITokenUsageData
   ) => Promise<AIMessage>
   getMessages: (conversationId: string) => Promise<AIMessage[]>
   getMessages: (conversationId: string) => Promise<AIMessage[]>
+  getConversationTokenUsage: (conversationId: string) => Promise<AITokenUsageData>
   deleteMessage: (messageId: string) => Promise<boolean>
   showAiLogFile: () => Promise<{ success: boolean; path?: string; error?: string }>
   getDefaultDesensitizeRules: (locale: string) => Promise<DesensitizeRule[]>
@@ -390,6 +401,30 @@ interface AiApi {
     sessionId: string
   ) => Promise<ToolExecuteResult>
   cancelToolTest: (testId: string) => Promise<{ success: boolean }>
+  estimateContextTokens: (
+    conversationId: string
+  ) => Promise<{ success: boolean; tokens: number; messageCount?: number; error?: string }>
+  compressContext: (
+    conversationId: string,
+    compressionConfig: {
+      enabled: boolean
+      tokenThresholdPercent: number
+      bufferSizePercent: number
+      compressionModelConfigId?: string
+      maxToolResultPercent?: number
+    },
+    systemPrompt: string
+  ) => Promise<{
+    success: boolean
+    result?: {
+      compressed: boolean
+      reason: string
+      tokensBefore?: number
+      tokensAfter?: number
+      error?: string
+    }
+    error?: string
+  }>
   // 自定义筛选（支持分页）
   filterMessagesWithContext: (
     sessionId: string,
@@ -775,10 +810,16 @@ interface AgentApi {
     onChunk?: (chunk: AgentStreamChunk) => void,
     chatType?: 'group' | 'private',
     locale?: string,
-    maxHistoryRounds?: number,
     assistantId?: string,
     skillId?: string | null,
-    enableAutoSkill?: boolean
+    enableAutoSkill?: boolean,
+    compressionConfig?: {
+      enabled: boolean
+      tokenThresholdPercent: number
+      bufferSizePercent: number
+      compressionModelConfigId?: string
+      maxToolResultPercent?: number
+    }
   ) => { requestId: string; promise: Promise<{ success: boolean; result?: AgentResult; error?: SerializedErrorInfo }> }
   abort: (requestId: string) => Promise<{ success: boolean; error?: string }>
 }
diff --git a/package.json b/package.json
index f8616a41..e10b705c 100644
--- a/package.json
+++ b/package.json
@@ -53,6 +53,7 @@
     "fastify": "^5.8.4",
     "gray-matter": "^4.0.3",
     "i18next": "^25.8.5",
+    "js-tiktoken": "^1.0.21",
     "markdown-it": "^14.1.0",
     "stream-json": "^1.9.1",
     "vue-i18n": "^11.2.8"
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 9680535a..d2627ff1 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -53,6 +53,9 @@ importers:
       i18next:
         specifier: ^25.8.5
         version: 25.8.5(typescript@5.9.3)
+      js-tiktoken:
+        specifier: ^1.0.21
+        version: 1.0.21
       markdown-it:
         specifier: ^14.1.0
         version: 14.1.0
@@ -3235,6 +3238,9 @@ packages:
     resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
     hasBin: true
 
+  js-tiktoken@1.0.21:
+    resolution: {integrity: sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==}
+
   js-tokens@4.0.0:
     resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
 
@@ -8913,6 +8919,10 @@ snapshots:
 
   jiti@2.6.1: {}
 
+  js-tiktoken@1.0.21:
+    dependencies:
+      base64-js: 1.5.1
+
   js-tokens@4.0.0: {}
 
   js-tokens@9.0.1: {}
diff --git a/src/components/AIChat/ChatExplorer.vue b/src/components/AIChat/ChatExplorer.vue
index d6bf8d51..1c47cc99 100644
--- a/src/components/AIChat/ChatExplorer.vue
+++ b/src/components/AIChat/ChatExplorer.vue
@@ -91,6 +91,28 @@ const {
 // Store
 const promptStore = usePromptStore()
 
+// 使用后端 tokenizer 精确计算的 context tokens
+const estimatedContextTokens = ref(0)
+
+watch(
+  () => currentConversationId.value,
+  async (convId) => {
+    if (!convId) {
+      estimatedContextTokens.value = 0
+      return
+    }
+    try {
+      const result = await window.aiApi.estimateContextTokens(convId)
+      if (result.success) {
+        estimatedContextTokens.value = result.tokens
+      }
+    } catch {
+      estimatedContextTokens.value = 0
+    }
+  },
+  { immediate: true }
+)
+
 // 当前选中助手的预设问题
 const currentPresetQuestions = computed(() => {
   return assistantStore.selectedAssistant?.presetQuestions ?? []
@@ -461,6 +483,7 @@ watch(
                 :session-token-usage="sessionTokenUsage"
                 :agent-status="agentStatus"
                 :current-conversation-id="currentConversationId"
+                :estimated-context-tokens="estimatedContextTokens"
               />
             </div>
           </div>
diff --git a/src/components/AIChat/chat/ChatMessage.vue b/src/components/AIChat/chat/ChatMessage.vue
index eac60692..e505e2a2 100644
--- a/src/components/AIChat/chat/ChatMessage.vue
+++ b/src/components/AIChat/chat/ChatMessage.vue
@@ -13,7 +13,7 @@ const toast = useToast()
 
 // Props
 const props = defineProps<{
-  role: 'user' | 'assistant'
+  role: 'user' | 'assistant' | 'summary'
   content: string
   timestamp: number
   isStreaming?: boolean
@@ -30,6 +30,7 @@ const formattedTime = computed(() => {
 
 // 是否是用户消息
 const isUser = computed(() => props.role === 'user')
+const isSummary = computed(() => props.role === 'summary')
 
 // 创建 markdown-it 实例
 const md = new MarkdownIt({
@@ -301,11 +302,37 @@ async function handleCopyMarkdown() {
 </script>
 
 <template>
-  <div class="flex items-start gap-3" :class="[isUser ? 'flex-row-reverse' : '']">
+  <div class="flex items-start gap-3" :class="[isUser ? 'flex-row-reverse' : '', isSummary ? 'justify-center' : '']">
     <!-- 消息内容 -->
-    <div class="max-w-[85%] min-w-0">
+    <div :class="[isSummary ? 'w-full min-w-0' : 'max-w-[85%] min-w-0']">
+      <!-- Summary 消息：可折叠的上下文摘要 -->
+      <template v-if="isSummary">
+        <details
+          class="w-full rounded-xl border border-purple-200 bg-purple-50/50 dark:border-purple-800/50 dark:bg-purple-900/20"
+        >
+          <summary
+            class="flex cursor-pointer select-none items-center gap-2 px-4 py-2.5 text-sm font-medium text-purple-700 transition-colors hover:text-purple-900 dark:text-purple-300 dark:hover:text-purple-100"
+          >
+            <UIcon name="i-heroicons-archive-box-arrow-down" class="h-4 w-4 shrink-0" />
+            <span>{{ t('ai.chat.message.summary.label') }}</span>
+            <span class="ml-auto text-xs font-normal text-purple-400 dark:text-purple-500">
+              {{ t('ai.chat.message.summary.expand') }}
+            </span>
+          </summary>
+          <div class="border-t border-purple-200/50 px-4 py-3 dark:border-purple-800/30">
+            <div
+              class="prose prose-sm dark:prose-invert max-w-none text-sm leading-relaxed text-gray-700 dark:text-gray-300"
+              v-html="renderedContent"
+            />
+            <p class="mt-3 text-xs italic text-purple-400 dark:text-purple-500">
+              {{ t('ai.chat.message.summary.info') }}
+            </p>
+          </div>
+        </details>
+      </template>
+
       <!-- 用户消息：简单气泡 -->
-      <template v-if="isUser">
+      <template v-else-if="isUser">
         <div class="rounded-3xl bg-primary-50 px-5 py-3 text-gray-900 dark:bg-primary-500/50 dark:text-gray-100">
           <div class="prose prose-sm dark:prose-invert max-w-none leading-relaxed" v-html="renderedContent" />
         </div>
@@ -438,8 +465,12 @@ async function handleCopyMarkdown() {
         </div>
       </template>
 
-      <!-- 时间戳 + 操作按钮 -->
-      <div class="mt-1 flex items-center gap-2 px-1" :class="[isUser ? 'flex-row-reverse' : '']">
+      <!-- 时间戳 + 操作按钮（summary 消息和流式输出中不显示） -->
+      <div
+        v-if="!isSummary && !isStreaming"
+        class="mt-1 flex items-center gap-2 px-1"
+        :class="[isUser ? 'flex-row-reverse' : '']"
+      >
         <span class="text-xs text-gray-400">{{ formattedTime }}</span>
         <UTooltip :text="t('ai.chat.message.copy.tooltip')" class="no-capture">
           <UButton
diff --git a/src/components/AIChat/chat/ChatStatusBar.vue b/src/components/AIChat/chat/ChatStatusBar.vue
index 56f2ba3d..48df38df 100644
--- a/src/components/AIChat/chat/ChatStatusBar.vue
+++ b/src/components/AIChat/chat/ChatStatusBar.vue
@@ -18,6 +18,7 @@ const props = defineProps<{
   sessionTokenUsage: { totalTokens: number }
   agentStatus?: AgentRuntimeStatus | null
   currentConversationId?: string | null
+  estimatedContextTokens?: number
 }>()
 
 // Store
@@ -76,6 +77,39 @@ function formatCompactNumber(value: number): string {
 const totalTokenUsageText = computed(() => formatNumber(props.sessionTokenUsage.totalTokens))
 const totalTokenUsageCompactText = computed(() => formatCompactNumber(props.sessionTokenUsage.totalTokens))
 
+const contextTokens = computed(() => {
+  if (props.agentStatus?.contextTokens) return props.agentStatus.contextTokens
+  if (props.estimatedContextTokens && props.estimatedContextTokens > 0) return props.estimatedContextTokens
+  return 0
+})
+
+const modelContextWindow = computed(() => {
+  if (!activeConfig.value) return 128000
+  const model = llmStore.getModelById(activeConfig.value.provider, activeConfig.value.model)
+  return model?.contextWindow ?? 128000
+})
+
+const contextUsagePercent = computed(() => {
+  if (contextTokens.value <= 0 || modelContextWindow.value <= 0) return 0
+  return Math.min(100, Math.round((contextTokens.value / modelContextWindow.value) * 100))
+})
+
+const contextBarColor = computed(() => {
+  const pct = contextUsagePercent.value
+  if (pct >= 80) return 'bg-red-500'
+  if (pct >= 60) return 'bg-amber-500'
+  return 'bg-emerald-500'
+})
+
+const contextBarTooltip = computed(() => {
+  const lines = []
+  lines.push(
+    `${t('ai.chat.statusBar.agent.contextTokens')}: ${formatNumber(contextTokens.value)} / ${formatNumber(modelContextWindow.value)} (${contextUsagePercent.value}%)`
+  )
+  lines.push(`${t('ai.chat.statusBar.tokenUsageTitle')}: ${totalTokenUsageText.value}`)
+  return lines.join('\n')
+})
+
 const agentCompactTitle = computed(() => {
   if (!props.agentStatus) return ''
   return [
@@ -163,6 +197,51 @@ async function handleExportConversation() {
   }
 }
 
+// 手动压缩上下文
+const isCompressing = ref(false)
+
+async function handleManualCompress() {
+  if (isCompressing.value || !props.currentConversationId) return
+
+  const compressionConfig = aiGlobalSettings.value.contextCompression
+  if (!compressionConfig) return
+
+  isCompressing.value = true
+  try {
+    const result = await window.aiApi.compressContext(
+      props.currentConversationId,
+      {
+        enabled: true,
+        tokenThresholdPercent: compressionConfig.tokenThresholdPercent ?? 75,
+        bufferSizePercent: compressionConfig.bufferSizePercent ?? 20,
+        compressionModelConfigId: compressionConfig.compressionModelConfigId,
+      },
+      ''
+    )
+
+    if (result.success && result.result) {
+      if (result.result.compressed) {
+        toast.success(t('ai.chat.statusBar.compress.success'), {
+          description: t('ai.chat.statusBar.compress.successDesc', {
+            before: result.result.tokensBefore ?? '?',
+            after: result.result.tokensAfter ?? '?',
+          }),
+        })
+      } else {
+        toast.warn(t('ai.chat.statusBar.compress.skipped'), {
+          description: t('ai.chat.statusBar.compress.skippedDesc'),
+        })
+      }
+    } else {
+      toast.fail(t('ai.chat.statusBar.compress.failed'), { description: result.error })
+    }
+  } catch (error) {
+    toast.fail(t('ai.chat.statusBar.compress.failed'), { description: String(error) })
+  } finally {
+    isCompressing.value = false
+  }
+}
+
 // 打开当前 AI 日志文件并定位到文件
 async function openAiLogFile() {
   if (isOpeningLog.value) return
@@ -266,13 +345,30 @@ async function openAiLogFile() {
         </span>
       </div>
 
-      <div
-        class="hidden shrink-0 items-center gap-1 rounded-md px-1.5 py-1 text-xs text-gray-400 dark:text-gray-500 md:flex"
-        :title="t('ai.chat.statusBar.tokenUsageTitle')"
-      >
-        <UIcon name="i-heroicons-circle-stack" class="h-3.5 w-3.5" />
-        <span>{{ totalTokenUsageCompactText }}</span>
-      </div>
+      <!-- Context 进度条 -->
+      <UTooltip v-if="contextTokens > 0" :ui="{ content: 'h-auto py-1.5' }">
+        <div
+          class="hidden shrink-0 items-center gap-1.5 rounded-md px-1.5 py-1 text-xs text-gray-400 dark:text-gray-500 md:flex"
+        >
+          <div class="h-1.5 w-10 overflow-hidden rounded-full bg-gray-200 dark:bg-gray-700">
+            <div
+              class="h-full rounded-full transition-all duration-300"
+              :class="contextBarColor"
+              :style="{ width: `${contextUsagePercent}%` }"
+            />
+          </div>
+          <span class="text-[10px]">{{ contextUsagePercent }}%</span>
+        </div>
+        <template #content>
+          <div class="space-y-0.5 whitespace-nowrap text-xs">
+            <div>
+              {{ t('ai.chat.statusBar.agent.contextTokens') }}: {{ formatCompactNumber(contextTokens) }} /
+              {{ formatCompactNumber(modelContextWindow) }}
+            </div>
+            <div>{{ t('ai.chat.statusBar.tokenUsageTitle') }}: {{ totalTokenUsageCompactText }}</div>
+          </div>
+        </template>
+      </UTooltip>
 
       <!-- 消息条数限制（点击跳转设置） -->
       <button
@@ -294,6 +390,23 @@ async function openAiLogFile() {
         <UIcon name="i-heroicons-arrow-down-tray" class="h-3.5 w-3.5" />
         <span class="hidden xl:inline">{{ t('ai.chat.statusBar.export.label') }}</span>
       </button>
+      <!-- 手动压缩按钮 -->
+      <button
+        v-if="aiGlobalSettings.contextCompression?.enabled"
+        class="flex shrink-0 items-center gap-1 rounded-md px-1.5 py-1 text-xs text-gray-400 transition-colors hover:bg-gray-100 hover:text-gray-600 disabled:cursor-not-allowed disabled:opacity-60 dark:hover:bg-gray-800 dark:hover:text-gray-300"
+        :title="t('ai.chat.statusBar.compress.title')"
+        :disabled="isCompressing || !currentConversationId"
+        @click="handleManualCompress"
+      >
+        <UIcon
+          name="i-heroicons-archive-box-arrow-down"
+          class="h-3.5 w-3.5"
+          :class="[isCompressing ? 'animate-pulse' : '']"
+        />
+        <span class="hidden xl:inline">
+          {{ isCompressing ? t('ai.chat.statusBar.compress.compressing') : t('ai.chat.statusBar.compress.label') }}
+        </span>
+      </button>
       <!-- 日志按钮 -->
       <button
         class="flex shrink-0 items-center gap-1 rounded-md px-1.5 py-1 text-xs text-gray-400 transition-colors hover:bg-gray-100 hover:text-gray-600 disabled:cursor-not-allowed disabled:opacity-60 dark:hover:bg-gray-800 dark:hover:text-gray-300"
diff --git a/src/components/common/Settings/AI/AIPromptConfigTab.vue b/src/components/common/Settings/AI/AIPromptConfigTab.vue
index dce7986b..cfcc2f1e 100644
--- a/src/components/common/Settings/AI/AIPromptConfigTab.vue
+++ b/src/components/common/Settings/AI/AIPromptConfigTab.vue
@@ -24,16 +24,6 @@ const globalMaxMessages = computed({
   },
 })
 
-// AI上下文限制
-const globalMaxHistoryRounds = computed({
-  get: () => aiGlobalSettings.value.maxHistoryRounds ?? 10,
-  set: (val: number) => {
-    const clampedVal = Math.max(1, Math.min(50, val || 10))
-    promptStore.updateAIGlobalSettings({ maxHistoryRounds: clampedVal })
-    emit('config-changed')
-  },
-})
-
 // 导出格式选项（AI 对话）
 const exportFormatTabs = computed(() => [
   { label: 'Markdown', value: 'markdown' },
@@ -89,6 +79,50 @@ const searchContextAfter = computed({
     emit('config-changed')
   },
 })
+
+// 上下文压缩配置
+const compressionEnabled = computed({
+  get: () => aiGlobalSettings.value.contextCompression?.enabled ?? false,
+  set: (val: boolean) => {
+    promptStore.updateAIGlobalSettings({
+      contextCompression: { ...aiGlobalSettings.value.contextCompression, enabled: val },
+    })
+    emit('config-changed')
+  },
+})
+
+const compressionThreshold = computed({
+  get: () => aiGlobalSettings.value.contextCompression?.tokenThresholdPercent ?? 75,
+  set: (val: number) => {
+    const clampedVal = Math.max(30, Math.min(95, val || 75))
+    promptStore.updateAIGlobalSettings({
+      contextCompression: { ...aiGlobalSettings.value.contextCompression, tokenThresholdPercent: clampedVal },
+    })
+    emit('config-changed')
+  },
+})
+
+const compressionBuffer = computed({
+  get: () => aiGlobalSettings.value.contextCompression?.bufferSizePercent ?? 20,
+  set: (val: number) => {
+    const clampedVal = Math.max(5, Math.min(50, val || 20))
+    promptStore.updateAIGlobalSettings({
+      contextCompression: { ...aiGlobalSettings.value.contextCompression, bufferSizePercent: clampedVal },
+    })
+    emit('config-changed')
+  },
+})
+
+const maxToolResultPercent = computed({
+  get: () => aiGlobalSettings.value.contextCompression?.maxToolResultPercent ?? 50,
+  set: (val: number) => {
+    const clampedVal = Math.max(10, Math.min(60, val || 50))
+    promptStore.updateAIGlobalSettings({
+      contextCompression: { ...aiGlobalSettings.value.contextCompression, maxToolResultPercent: clampedVal },
+    })
+    emit('config-changed')
+  },
+})
 </script>
 
 <template>
@@ -113,19 +147,6 @@ const searchContextAfter = computed({
           <UInputNumber v-model="globalMaxMessages" :min="0" :max="50000" class="w-30" />
         </div>
 
-        <!-- AI上下文限制 -->
-        <div class="flex items-center justify-between">
-          <div class="flex-1 pr-4">
-            <p class="text-sm font-medium text-gray-900 dark:text-white">
-              {{ t('settings.aiPrompt.maxHistory.title') }}
-            </p>
-            <p class="text-xs text-gray-500 dark:text-gray-400">
-              {{ t('settings.aiPrompt.maxHistory.description') }}
-            </p>
-          </div>
-          <UInputNumber v-model="globalMaxHistoryRounds" :min="1" :max="50" class="w-30" />
-        </div>
-
         <!-- 搜索上下文窗口 -->
         <div>
           <div class="mb-2">
@@ -175,6 +196,78 @@ const searchContextAfter = computed({
       </div>
     </div>
 
+    <!-- 上下文压缩设置 -->
+    <div>
+      <h4 class="mb-3 flex items-center gap-2 text-sm font-semibold text-gray-900 dark:text-white">
+        <UIcon name="i-heroicons-archive-box-arrow-down" class="h-4 w-4 text-purple-500" />
+        {{ t('settings.aiPrompt.compression.title') }}
+      </h4>
+      <div class="space-y-4 rounded-lg border border-gray-200 bg-gray-50 p-4 dark:border-gray-700 dark:bg-gray-800/50">
+        <!-- 压缩开关 -->
+        <div class="flex items-center justify-between">
+          <div class="flex-1 pr-4">
+            <p class="text-sm font-medium text-gray-900 dark:text-white">
+              {{ t('settings.aiPrompt.compression.enable') }}
+            </p>
+            <p class="text-xs text-gray-500 dark:text-gray-400">
+              {{ t('settings.aiPrompt.compression.enableDesc') }}
+            </p>
+          </div>
+          <USwitch v-model="compressionEnabled" />
+        </div>
+
+        <!-- 工具结果上限（始终显示，不依赖压缩开关） -->
+        <div class="flex items-center justify-between">
+          <div class="flex-1 pr-4">
+            <p class="text-sm font-medium text-gray-900 dark:text-white">
+              {{ t('settings.aiPrompt.compression.maxToolResultPercent') }}
+            </p>
+            <p class="text-xs text-gray-500 dark:text-gray-400">
+              {{ t('settings.aiPrompt.compression.maxToolResultPercentDesc') }}
+            </p>
+          </div>
+          <div class="flex items-center gap-1">
+            <UInputNumber v-model="maxToolResultPercent" :min="10" :max="60" class="w-24" />
+            <span class="text-xs text-gray-400">%</span>
+          </div>
+        </div>
+
+        <template v-if="compressionEnabled">
+          <!-- 压缩阈值 -->
+          <div class="flex items-center justify-between">
+            <div class="flex-1 pr-4">
+              <p class="text-sm font-medium text-gray-900 dark:text-white">
+                {{ t('settings.aiPrompt.compression.threshold') }}
+              </p>
+              <p class="text-xs text-gray-500 dark:text-gray-400">
+                {{ t('settings.aiPrompt.compression.thresholdDesc') }}
+              </p>
+            </div>
+            <div class="flex items-center gap-1">
+              <UInputNumber v-model="compressionThreshold" :min="30" :max="95" class="w-24" />
+              <span class="text-xs text-gray-400">%</span>
+            </div>
+          </div>
+
+          <!-- 缓冲区大小 -->
+          <div class="flex items-center justify-between">
+            <div class="flex-1 pr-4">
+              <p class="text-sm font-medium text-gray-900 dark:text-white">
+                {{ t('settings.aiPrompt.compression.buffer') }}
+              </p>
+              <p class="text-xs text-gray-500 dark:text-gray-400">
+                {{ t('settings.aiPrompt.compression.bufferDesc') }}
+              </p>
+            </div>
+            <div class="flex items-center gap-1">
+              <UInputNumber v-model="compressionBuffer" :min="5" :max="50" class="w-24" />
+              <span class="text-xs text-gray-400">%</span>
+            </div>
+          </div>
+        </template>
+      </div>
+    </div>
+
     <!-- 导出设置 -->
     <div>
       <h4 class="mb-3 flex items-center gap-2 text-sm font-semibold text-gray-900 dark:text-white">
diff --git a/src/i18n/locales/en-US/ai.json b/src/i18n/locales/en-US/ai.json
index 81f5250f..ace53246 100644
--- a/src/i18n/locales/en-US/ai.json
+++ b/src/i18n/locales/en-US/ai.json
@@ -82,6 +82,12 @@
         "unanswered_messages": "Unanswered Messages"
       },
       "generating": "Generating response...",
+      "summary": {
+        "label": "Context Summary",
+        "expand": "Expand summary",
+        "collapse": "Collapse summary",
+        "info": "Previous conversation has been compressed into a summary to save context space"
+      },
       "think": {
         "labels": {
           "think": "Thinking",
@@ -157,7 +163,7 @@
           "error": "Err"
         }
       },
-      "tokenUsageTitle": "Total token usage in this session",
+      "tokenUsageTitle": "Total token usage",
       "export": {
         "label": "Export",
         "title": "Export current conversation"
@@ -168,6 +174,16 @@
         "openFailed": "Failed to open log",
         "openFailedDesc": "Please try again later"
       },
+      "compress": {
+        "label": "Compress",
+        "title": "Manually compress context",
+        "compressing": "Compressing...",
+        "success": "Context compressed",
+        "successDesc": "{before} → {after} tokens",
+        "skipped": "Compression skipped",
+        "skippedDesc": "Not enough messages to compress",
+        "failed": "Compression failed"
+      },
       "status": {
         "connected": "AI Connected",
         "notConfigured": "Please configure AI service in Settings"
diff --git a/src/i18n/locales/en-US/settings.json b/src/i18n/locales/en-US/settings.json
index 212f3f7e..ce9bdf67 100644
--- a/src/i18n/locales/en-US/settings.json
+++ b/src/i18n/locales/en-US/settings.json
@@ -273,10 +273,6 @@
       "title": "Max Messages per Request",
       "description": "Max number of messages sent to AI per request. Higher values consume more tokens but provide more accurate context (recommended: 2000)."
     },
-    "maxHistory": {
-      "title": "Context Limit",
-      "description": "Number of recent conversation rounds to keep (1 round = User + AI). Prevents excessive token usage."
-    },
     "searchContext": {
       "title": "Search Context Window",
       "description": "Automatically include surrounding messages for each search hit to help AI understand the context. Set to 0 to disable",
@@ -292,6 +288,17 @@
       "title": "SQL Lab Export Format",
       "description": "File format for exporting SQL query results"
     },
+    "compression": {
+      "title": "Context Compression",
+      "enable": "Auto-compress context",
+      "enableDesc": "Automatically compress long conversation history into summaries to reduce token usage",
+      "threshold": "Compression threshold",
+      "thresholdDesc": "Trigger compression when context tokens reach this percentage of the model's context window",
+      "buffer": "Buffer size",
+      "bufferDesc": "Percentage of recent messages to keep uncompressed",
+      "maxToolResultPercent": "Tool result limit",
+      "maxToolResultPercentDesc": "Maximum data percentage per tool call sent to AI (based on model context window). Excess results are intelligently truncated"
+    },
     "presets": {
       "title": "System Prompts",
       "add": "Add Preset",
diff --git a/src/i18n/locales/ja-JP/ai.json b/src/i18n/locales/ja-JP/ai.json
index 1eea963c..73552131 100644
--- a/src/i18n/locales/ja-JP/ai.json
+++ b/src/i18n/locales/ja-JP/ai.json
@@ -82,6 +82,12 @@
         "unanswered_messages": "未返信メッセージ"
       },
       "generating": "回答を作成中...",
+      "summary": {
+        "label": "コンテキスト要約",
+        "expand": "要約を展開",
+        "collapse": "要約を折りたたむ",
+        "info": "以前の会話がコンテキスト節約のため要約に圧縮されました"
+      },
       "think": {
         "labels": {
           "think": "思考",
@@ -157,7 +163,7 @@
           "error": "エラー"
         }
       },
-      "tokenUsageTitle": "この会話で使用した Token 合計",
+      "tokenUsageTitle": "累計 Token 使用量",
       "export": {
         "label": "エクスポート",
         "title": "現在のチャットをエクスポート"
@@ -168,6 +174,16 @@
         "openFailed": "ログを開けませんでした",
         "openFailedDesc": "しばらくしてから再度お試しください"
       },
+      "compress": {
+        "label": "圧縮",
+        "title": "手動でコンテキストを圧縮",
+        "compressing": "圧縮中...",
+        "success": "コンテキストが圧縮されました",
+        "successDesc": "{before} → {after} トークン",
+        "skipped": "圧縮をスキップ",
+        "skippedDesc": "圧縮に十分なメッセージがありません",
+        "failed": "圧縮に失敗しました"
+      },
       "status": {
         "connected": "AI 接続中",
         "notConfigured": "グローバル設定で AI サービスを設定してください"
diff --git a/src/i18n/locales/ja-JP/settings.json b/src/i18n/locales/ja-JP/settings.json
index 94006248..8035a9b6 100644
--- a/src/i18n/locales/ja-JP/settings.json
+++ b/src/i18n/locales/ja-JP/settings.json
@@ -273,10 +273,6 @@
       "title": "送信メッセージ数の上限",
       "description": "1 回のリクエストで AI に送る最大メッセージ数です。増やすほど Token 消費は増えますが、分析精度も上がります（初めてなら 2000 推奨）"
     },
-    "maxHistory": {
-      "title": "AI コンテキスト制限",
-      "description": "会話ごとに保持する直近のやり取り数です（1 往復 = ユーザーの質問 + AI の回答）。文脈が長くなりすぎて Token を消費するのを防ぎます"
-    },
     "searchContext": {
       "title": "検索コンテキストウィンドウ",
       "description": "検索ヒット時に前後の会話コンテキストを自動的に含めることで、AI がメッセージの背景を理解しやすくなります。0 に設定するとコンテキストなし",
@@ -292,6 +288,17 @@
       "title": "SQL Lab エクスポート形式",
       "description": "SQL クエリ結果をエクスポートする際のファイル形式"
     },
+    "compression": {
+      "title": "コンテキスト圧縮",
+      "enable": "コンテキストの自動圧縮",
+      "enableDesc": "会話が長くなった際に履歴を自動的に要約に圧縮し、トークン消費を削減します",
+      "threshold": "圧縮閾値",
+      "thresholdDesc": "コンテキストトークンがモデルウィンドウのこの割合に達したら圧縮を開始",
+      "buffer": "バッファサイズ",
+      "bufferDesc": "圧縮せずに保持する最新メッセージの割合",
+      "maxToolResultPercent": "ツール結果の上限",
+      "maxToolResultPercentDesc": "1回のツール呼び出しでAIに送信される最大データ割合（モデルのコンテキストウィンドウに基づいて計算）。超過分はスマートに切り捨てられます"
+    },
     "presets": {
       "title": "システムプロンプト",
       "add": "プリセットを追加",
diff --git a/src/i18n/locales/zh-CN/ai.json b/src/i18n/locales/zh-CN/ai.json
index d81f85c3..7d2d518b 100644
--- a/src/i18n/locales/zh-CN/ai.json
+++ b/src/i18n/locales/zh-CN/ai.json
@@ -82,6 +82,12 @@
         "unanswered_messages": "未回复消息"
       },
       "generating": "正在生成回复...",
+      "summary": {
+        "label": "上下文摘要",
+        "expand": "展开摘要",
+        "collapse": "收起摘要",
+        "info": "以上对话已被压缩为摘要以节省上下文空间"
+      },
       "think": {
         "labels": {
           "think": "已思考",
@@ -157,7 +163,7 @@
           "error": "错误"
         }
       },
-      "tokenUsageTitle": "本次会话累计 Token 使用量",
+      "tokenUsageTitle": "累计 Token 使用量",
       "export": {
         "label": "导出",
         "title": "导出当前对话"
@@ -168,6 +174,16 @@
         "openFailed": "打开日志失败",
         "openFailedDesc": "请稍后重试"
       },
+      "compress": {
+        "label": "压缩",
+        "title": "手动压缩上下文",
+        "compressing": "正在压缩...",
+        "success": "上下文已压缩",
+        "successDesc": "{before} → {after} tokens",
+        "skipped": "跳过压缩",
+        "skippedDesc": "消息不足，无需压缩",
+        "failed": "压缩失败"
+      },
       "status": {
         "connected": "AI 已连接",
         "notConfigured": "请在全局设置中配置 AI 服务"
diff --git a/src/i18n/locales/zh-CN/settings.json b/src/i18n/locales/zh-CN/settings.json
index 109f0a3d..4ef25903 100644
--- a/src/i18n/locales/zh-CN/settings.json
+++ b/src/i18n/locales/zh-CN/settings.json
@@ -273,10 +273,6 @@
       "title": "发送条数限制",
       "description": "每次提交给 AI 的最大消息数，数值越大 Token 消耗越多，分析也更准确（新手建议2000）"
     },
-    "maxHistory": {
-      "title": "AI上下文限制",
-      "description": "每次对话保留最近的对话轮数（1轮 = 用户提问 + AI回复），防止上下文过长消耗 Token"
-    },
     "searchContext": {
       "title": "搜索上下文窗口",
       "description": "搜索命中消息时自动携带前后的对话上下文，帮助 AI 理解消息背景。设为 0 则不携带上下文",
@@ -292,6 +288,17 @@
       "title": "SQL Lab 导出格式",
       "description": "导出 SQL 查询结果时使用的文件格式"
     },
+    "compression": {
+      "title": "上下文压缩",
+      "enable": "自动压缩上下文",
+      "enableDesc": "对话过长时自动压缩历史为摘要，保留关键信息的同时降低 Token 消耗",
+      "threshold": "压缩阈值",
+      "thresholdDesc": "上下文 Token 达到模型窗口的百分比时触发压缩",
+      "buffer": "缓冲区大小",
+      "bufferDesc": "保留最近消息原文的比例，不参与压缩",
+      "maxToolResultPercent": "工具结果上限",
+      "maxToolResultPercentDesc": "单次工具返回给 AI 的最大数据量占比（基于模型上下文窗口计算），超出部分会被智能截断"
+    },
     "presets": {
       "title": "系统提示词",
       "add": "添加预设",
diff --git a/src/i18n/locales/zh-TW/ai.json b/src/i18n/locales/zh-TW/ai.json
index 201241a8..73ee7378 100644
--- a/src/i18n/locales/zh-TW/ai.json
+++ b/src/i18n/locales/zh-TW/ai.json
@@ -82,6 +82,12 @@
         "unanswered_messages": "未回覆訊息"
       },
       "generating": "正在產生回覆...",
+      "summary": {
+        "label": "上下文摘要",
+        "expand": "展開摘要",
+        "collapse": "收起摘要",
+        "info": "以上對話已被壓縮為摘要以節省上下文空間"
+      },
       "think": {
         "labels": {
           "think": "已思考",
@@ -157,7 +163,7 @@
           "error": "錯誤"
         }
       },
-      "tokenUsageTitle": "本次對話累計 Token 用量",
+      "tokenUsageTitle": "累計 Token 用量",
       "export": {
         "label": "匯出",
         "title": "匯出當前對話"
@@ -168,6 +174,16 @@
         "openFailed": "開啟日誌失敗",
         "openFailedDesc": "請稍後重試"
       },
+      "compress": {
+        "label": "壓縮",
+        "title": "手動壓縮上下文",
+        "compressing": "正在壓縮...",
+        "success": "上下文已壓縮",
+        "successDesc": "{before} → {after} tokens",
+        "skipped": "跳過壓縮",
+        "skippedDesc": "訊息不足，無需壓縮",
+        "failed": "壓縮失敗"
+      },
       "status": {
         "connected": "AI 已連線",
         "notConfigured": "請先在全域設定中設定 AI 服務"
diff --git a/src/i18n/locales/zh-TW/settings.json b/src/i18n/locales/zh-TW/settings.json
index c8658ad8..97141a45 100644
--- a/src/i18n/locales/zh-TW/settings.json
+++ b/src/i18n/locales/zh-TW/settings.json
@@ -273,10 +273,6 @@
       "title": "傳送條數限制",
       "description": "每次提交給 AI 的最大訊息數，數值越大 Token 消耗越多，分析也更準確（新手建議2000）"
     },
-    "maxHistory": {
-      "title": "AI 上下文限制",
-      "description": "每次對話只保留最近幾輪內容（1 輪 = 使用者提問 + AI 回覆），避免上下文過長而消耗過多 Token"
-    },
     "searchContext": {
       "title": "搜尋上下文視窗",
       "description": "搜尋命中訊息時自動攜帶前後的對話上下文，幫助 AI 理解訊息背景。設為 0 則不攜帶上下文",
@@ -292,6 +288,17 @@
       "title": "SQL Lab 匯出格式",
       "description": "匯出 SQL 查詢結果時使用的檔案格式"
     },
+    "compression": {
+      "title": "上下文壓縮",
+      "enable": "自動壓縮上下文",
+      "enableDesc": "對話過長時自動壓縮歷史為摘要，保留關鍵資訊的同時降低 Token 消耗",
+      "threshold": "壓縮閾值",
+      "thresholdDesc": "上下文 Token 達到模型視窗的百分比時觸發壓縮",
+      "buffer": "緩衝區大小",
+      "bufferDesc": "保留最近訊息原文的比例，不參與壓縮",
+      "maxToolResultPercent": "工具結果上限",
+      "maxToolResultPercentDesc": "單次工具回傳給 AI 的最大資料量占比（基於模型上下文視窗計算），超出部分會被智慧截斷"
+    },
     "presets": {
       "title": "系統提示詞",
       "add": "新增預設",
diff --git a/src/stores/aiChat.ts b/src/stores/aiChat.ts
index 67096335..56416851 100644
--- a/src/stores/aiChat.ts
+++ b/src/stores/aiChat.ts
@@ -54,7 +54,7 @@ export type ContentBlock =
 // 消息类型
 export interface ChatMessage {
   id: string
-  role: 'user' | 'assistant'
+  role: 'user' | 'assistant' | 'summary'
   content: string
   timestamp: number
   dataSource?: {
@@ -97,6 +97,7 @@ interface ConversationBuffer {
   currentKeywords: string[]
   assistantId: string | null
   loaded: boolean
+  sessionTokenUsage?: TokenUsage
 }
 
 export interface AIChatSessionState {
@@ -284,12 +285,21 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
    * 这里只切换显示，不会影响后台正在推理的 buffer。
    */
   function bindDisplayedBuffer(state: AIChatSessionState, bufferKey: string): void {
+    // 保存当前对话的 token 使用量
+    const currentKey = state.currentConversationId ?? DRAFT_CONVERSATION_KEY
+    const currentBuffer = state.conversationBuffers[currentKey]
+    if (currentBuffer) {
+      currentBuffer.sessionTokenUsage = { ...state.sessionTokenUsage }
+    }
+
     const buffer = getOrCreateBuffer(state, bufferKey)
     state.currentConversationId = bufferKey === DRAFT_CONVERSATION_KEY ? null : bufferKey
     state.messages = buffer.messages
     state.sourceMessages = buffer.sourceMessages
     state.currentKeywords = buffer.currentKeywords
     state.selectedAssistantId = buffer.assistantId
+    state.sessionTokenUsage = buffer.sessionTokenUsage ? { ...buffer.sessionTokenUsage } : createEmptyTokenUsage()
+    state.agentStatus = null
   }
 
   function renameBufferKey(state: AIChatSessionState, fromKey: string, toKey: string): ConversationBuffer {
@@ -435,7 +445,10 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
       const buffer = getOrCreateBuffer(state, conversationId, conversation?.assistantId ?? null)
 
       if (!buffer.loaded) {
-        const history = await window.aiApi.getMessages(conversationId)
+        const [history, tokenUsage] = await Promise.all([
+          window.aiApi.getMessages(conversationId),
+          window.aiApi.getConversationTokenUsage(conversationId),
+        ])
         buffer.messages.splice(
           0,
           buffer.messages.length,
@@ -449,6 +462,7 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
         )
         buffer.sourceMessages.splice(0, buffer.sourceMessages.length)
         buffer.currentKeywords.splice(0, buffer.currentKeywords.length)
+        buffer.sessionTokenUsage = tokenUsage
         buffer.loaded = true
       }
 
@@ -566,6 +580,7 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
     const targetBuffer = getOrCreateBuffer(state, initialBufferKey, state.selectedAssistantId)
     // 在 try 外部声明，以便 catch 块能正确引用当前轮次的用户消息
     let currentUserMessage: ChatMessage | undefined
+    let lastDoneUsage: TokenUsage | undefined
 
     targetBuffer.assistantId = state.selectedAssistantId
     targetBuffer.loaded = true
@@ -753,7 +768,6 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
         updateActiveTaskConversationId(chatKey, conversation.id)
       }
 
-      const maxHistoryRounds = aiGlobalSettings.value.maxHistoryRounds ?? 5
       const preprocessConfig = settingsStore.aiPreprocessConfig
       const hasPreprocess =
         preprocessConfig.dataCleaning ||
@@ -852,6 +866,7 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
             case 'done':
               state.currentToolStatus = null
               if (chunk.usage) {
+                lastDoneUsage = { ...chunk.usage }
                 state.sessionTokenUsage = {
                   promptTokens: state.sessionTokenUsage.promptTokens + chunk.usage.promptTokens,
                   completionTokens: state.sessionTokenUsage.completionTokens + chunk.usage.completionTokens,
@@ -884,10 +899,16 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
         },
         state.chatType,
         state.locale,
-        maxHistoryRounds,
         currentAssistantId,
         currentSkillId,
-        !currentSkillId ? autoSkillEnabled : undefined
+        !currentSkillId ? autoSkillEnabled : undefined,
+        {
+          enabled: aiGlobalSettings.value.contextCompression?.enabled ?? false,
+          tokenThresholdPercent: aiGlobalSettings.value.contextCompression?.tokenThresholdPercent ?? 75,
+          bufferSizePercent: aiGlobalSettings.value.contextCompression?.bufferSizePercent ?? 20,
+          compressionModelConfigId: aiGlobalSettings.value.contextCompression?.compressionModelConfigId,
+          maxToolResultPercent: aiGlobalSettings.value.contextCompression?.maxToolResultPercent ?? 50,
+        }
       )
 
       state.currentAgentRequestId = agentReqId
@@ -914,7 +935,12 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
           isStreaming: false,
         }
 
-        await saveConversation(resolvedConversationId, userMessage, targetBuffer.messages[aiMessageIndex])
+        await saveConversation(
+          resolvedConversationId,
+          userMessage,
+          targetBuffer.messages[aiMessageIndex],
+          lastDoneUsage
+        )
       } else if (!hasStreamError) {
         const blocks = targetBuffer.messages[aiMessageIndex].contentBlocks || []
         blocks.push({
@@ -926,9 +952,19 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
           contentBlocks: [...blocks],
           isStreaming: false,
         }
-        await saveConversation(resolvedConversationId, userMessage, targetBuffer.messages[aiMessageIndex])
+        await saveConversation(
+          resolvedConversationId,
+          userMessage,
+          targetBuffer.messages[aiMessageIndex],
+          lastDoneUsage
+        )
       } else {
-        await saveConversation(resolvedConversationId, userMessage, targetBuffer.messages[aiMessageIndex])
+        await saveConversation(
+          resolvedConversationId,
+          userMessage,
+          targetBuffer.messages[aiMessageIndex],
+          lastDoneUsage
+        )
       }
 
       return { success: true }
@@ -951,7 +987,7 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
         // 优先使用当前轮次的用户消息，避免多轮对话取到第一条历史消息
         const userMsg = currentUserMessage || targetBuffer.messages.findLast((m) => m.role === 'user')
         if (userMsg) {
-          await saveConversation(resolvedConversationId, userMsg, lastMessage)
+          await saveConversation(resolvedConversationId, userMsg, lastMessage, lastDoneUsage)
         }
       }
 
@@ -970,7 +1006,8 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
   async function saveConversation(
     conversationId: string | null,
     userMsg: ChatMessage,
-    aiMsg: ChatMessage
+    aiMsg: ChatMessage,
+    tokenUsage?: TokenUsage
   ): Promise<void> {
     try {
       if (!conversationId) {
@@ -987,7 +1024,8 @@ export const useAIChatStore = defineStore('aiChatRuntime', () => {
         aiMsg.content,
         undefined,
         undefined,
-        serializableContentBlocks
+        serializableContentBlocks,
+        tokenUsage
       )
     } catch (error) {
       console.error('[AI] 保存对话失败：', error)
diff --git a/src/stores/prompt.ts b/src/stores/prompt.ts
index 20609c25..111b5ee1 100644
--- a/src/stores/prompt.ts
+++ b/src/stores/prompt.ts
@@ -11,12 +11,18 @@ export const usePromptStore = defineStore(
     const aiConfigVersion = ref(0)
     const aiGlobalSettings = ref({
       maxMessagesPerRequest: 1000,
-      maxHistoryRounds: 5,
       exportFormat: 'markdown' as 'markdown' | 'txt',
       sqlExportFormat: 'csv' as 'csv' | 'json',
       enableAutoSkill: true,
       searchContextBefore: 2,
       searchContextAfter: 2,
+      contextCompression: {
+        enabled: true,
+        tokenThresholdPercent: 75,
+        bufferSizePercent: 20,
+        compressionModelConfigId: undefined as string | undefined,
+        maxToolResultPercent: 50,
+      },
     })
     const customKeywordTemplates = ref<KeywordTemplate[]>([])
     const deletedPresetTemplateIds = ref<string[]>([])
@@ -34,15 +40,29 @@ export const usePromptStore = defineStore(
     function updateAIGlobalSettings(
       settings: Partial<{
         maxMessagesPerRequest: number
-        maxHistoryRounds: number
         exportFormat: 'markdown' | 'txt'
         sqlExportFormat: 'csv' | 'json'
         enableAutoSkill: boolean
         searchContextBefore: number
         searchContextAfter: number
+        contextCompression: {
+          enabled: boolean
+          tokenThresholdPercent: number
+          bufferSizePercent: number
+          compressionModelConfigId?: string
+          maxToolResultPercent?: number
+        }
       }>
     ) {
-      aiGlobalSettings.value = { ...aiGlobalSettings.value, ...settings }
+      if (settings.contextCompression) {
+        aiGlobalSettings.value = {
+          ...aiGlobalSettings.value,
+          ...settings,
+          contextCompression: { ...aiGlobalSettings.value.contextCompression, ...settings.contextCompression },
+        }
+      } else {
+        aiGlobalSettings.value = { ...aiGlobalSettings.value, ...settings }
+      }
       notifyAIConfigChanged()
     }