Merge pull request #2650 from zhayujie/feat-cow-agent

feat: cow agent
This commit is contained in:
zhayujie
2026-02-01 13:14:00 +08:00
committed by GitHub
16 changed files with 648 additions and 908 deletions

View File

@@ -117,7 +117,7 @@ class MemoryFlushManager:
return user_dir / "MEMORY.md"
else:
# Return workspace root MEMORY.md
return Path(self.workspace_root) / "MEMORY.md"
return Path(self.workspace_dir) / "MEMORY.md"
def create_flush_prompt(self) -> str:
"""
@@ -214,7 +214,7 @@ def create_memory_files_if_needed(workspace_dir: Path, user_id: Optional[str] =
user_dir.mkdir(parents=True, exist_ok=True)
main_memory = user_dir / "MEMORY.md"
else:
main_memory = Path(workspace_root) / "MEMORY.md"
main_memory = Path(workspace_dir) / "MEMORY.md"
if not main_memory.exists():
# Create empty file or with minimal structure (no obvious "Memory" header)

View File

@@ -41,6 +41,7 @@ class PromptBuilder:
skill_manager: Any = None,
memory_manager: Any = None,
runtime_info: Optional[Dict[str, Any]] = None,
is_first_conversation: bool = False,
**kwargs
) -> str:
"""
@@ -54,6 +55,7 @@ class PromptBuilder:
skill_manager: 技能管理器
memory_manager: 记忆管理器
runtime_info: 运行时信息
is_first_conversation: 是否为首次对话
**kwargs: 其他参数
Returns:
@@ -69,6 +71,7 @@ class PromptBuilder:
skill_manager=skill_manager,
memory_manager=memory_manager,
runtime_info=runtime_info,
is_first_conversation=is_first_conversation,
**kwargs
)
@@ -83,6 +86,7 @@ def build_agent_system_prompt(
skill_manager: Any = None,
memory_manager: Any = None,
runtime_info: Optional[Dict[str, Any]] = None,
is_first_conversation: bool = False,
**kwargs
) -> str:
"""
@@ -108,6 +112,7 @@ def build_agent_system_prompt(
skill_manager: 技能管理器
memory_manager: 记忆管理器
runtime_info: 运行时信息
is_first_conversation: 是否为首次对话
**kwargs: 其他参数
Returns:
@@ -135,7 +140,7 @@ def build_agent_system_prompt(
sections.extend(_build_user_identity_section(user_identity, language))
# 6. 工作空间
sections.extend(_build_workspace_section(workspace_dir, language))
sections.extend(_build_workspace_section(workspace_dir, language, is_first_conversation))
# 7. 项目上下文文件SOUL.md, USER.md等
if context_files:
@@ -226,14 +231,20 @@ def _build_tooling_section(tools: List[Any], language: str) -> List[str]:
lines.extend([
"### 工具调用风格",
"",
"**默认规则**: 对于常规、低风险的工具调用,无需叙述,直接调用即可。",
"默认规则: 对于常规、低风险的工具调用,直接调用即可,无需叙述",
"",
"**需要叙述的情况**:",
"需要叙述的情况:",
"- 多步骤、复杂的任务",
"- 敏感操作(如删除文件)",
"- 用户明确要求解释过程",
"",
"**完成后**: 工具调用完成后,给用户一个简短、自然的确认或回复,不要直接结束对话",
"叙述要求: 保持简洁、信息密度高,避免重复显而易见的步骤",
"",
"完成标准:",
"- 确保用户的需求得到实际解决,而不仅仅是制定计划",
"- 当任务需要多次工具调用时,持续推进直到完成",
"- 每次工具调用后,评估是否已获得足够信息来推进或完成任务",
"- 避免重复调用相同的工具和相同参数获取相同的信息,除非用户明确要求",
"",
])
@@ -345,14 +356,28 @@ def _build_docs_section(workspace_dir: str, language: str) -> List[str]:
return []
def _build_workspace_section(workspace_dir: str, language: str) -> List[str]:
def _build_workspace_section(workspace_dir: str, language: str, is_first_conversation: bool = False) -> List[str]:
"""构建工作空间section"""
lines = [
"## 工作空间",
"",
f"你的工作目录是: `{workspace_dir}`",
"",
"除非用户明确指示,否则将此目录视为文件操作的全局工作空间。",
"**路径使用规则** (非常重要):",
"",
f"1. **相对路径的基准目录**: 所有相对路径都是相对于 `{workspace_dir}` 而言的",
f" - ✅ 正确: 访问工作空间内的文件用相对路径,如 `SOUL.md`",
f" - ❌ 错误: 用相对路径访问其他目录的文件 (如果它不在 `{workspace_dir}` 内)",
"",
"2. **访问其他目录**: 如果要访问工作空间之外的目录(如项目代码、系统文件),**必须使用绝对路径**",
f" - ✅ 正确: 例如 `~/chatgpt-on-wechat`、`/usr/local/`",
f" - ❌ 错误: 假设相对路径会指向其他目录",
"",
"3. **路径解析示例**:",
f" - 相对路径 `memory/` → 实际路径 `{workspace_dir}/memory/`",
f" - 绝对路径 `~/chatgpt-on-wechat/docs/` → 实际路径 `~/chatgpt-on-wechat/docs/`",
"",
"4. **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置",
"",
"**重要说明 - 文件已自动加载**:",
"",
@@ -362,26 +387,34 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]:
"- ✅ `USER.md`: 已加载 - 用户的身份信息",
"- ✅ `AGENTS.md`: 已加载 - 工作空间使用指南",
"",
"**首次对话**:",
"**交流规范**:",
"",
"如果这是你与用户的首次对话,并且你的人格设定和用户信息还是空白或初始状态:",
"",
"1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待",
"2. **简短打招呼后,分点询问三个核心问题**",
" - 你希望我叫什么名字?",
" - 你希望我怎么称呼你?",
" - 你希望我们是什么样的交流风格?(这里需要举例,如:专业严谨、轻松幽默、温暖友好等)",
"3. **语言风格**:温暖但不过度诗意,带点科技感,保持清晰",
"4. **问题格式**:用分点或换行,让问题清晰易读;前两个问题不需要额外说明,只有交流风格需要举例",
"5. 收到回复后,用 `write` 工具保存到 USER.md 和 SOUL.md",
"",
"**重要**: ",
"- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问。用自然表达如「我已记住」而非「已更新 SOUL.md」",
"- 不要问太多其他信息(职业、时区等可以后续自然了解)",
"- 保持简洁,避免过度抒情",
"- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问",
"- 用自然表达如「我已记住」而非「已更新 SOUL.md」",
"",
]
# 只在首次对话时添加引导内容
if is_first_conversation:
lines.extend([
"**🎉 首次对话引导**:",
"",
"这是你的第一次对话!进行以下流程:",
"",
"1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待",
"2. **简短打招呼后,询问核心问题**",
" - 你希望给我起个什么名字?",
" - 我该怎么称呼你?",
" - 你希望我们是什么样的交流风格?(需要举例,如:专业严谨、轻松幽默、温暖友好等)",
"3. **语言风格**:温暖但不过度诗意,带点科技感,保持清晰",
"4. **问题格式**:用分点或换行,让问题清晰易读",
"5. 收到回复后,用 `write` 工具保存到 USER.md 和 SOUL.md",
"",
"**注意事项**:",
"- 不要问太多其他信息(职业、时区等可以后续自然了解)",
"",
])
return lines

View File

@@ -5,6 +5,7 @@ Workspace Management - 工作空间管理模块
"""
import os
import json
from typing import List, Optional, Dict
from dataclasses import dataclass
@@ -17,6 +18,7 @@ DEFAULT_SOUL_FILENAME = "SOUL.md"
DEFAULT_USER_FILENAME = "USER.md"
DEFAULT_AGENTS_FILENAME = "AGENTS.md"
DEFAULT_MEMORY_FILENAME = "MEMORY.md"
DEFAULT_STATE_FILENAME = ".agent_state.json"
@dataclass
@@ -27,6 +29,7 @@ class WorkspaceFiles:
agents_path: str
memory_path: str
memory_dir: str
state_path: str
def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> WorkspaceFiles:
@@ -49,6 +52,7 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
agents_path = os.path.join(workspace_dir, DEFAULT_AGENTS_FILENAME)
memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME) # MEMORY.md 在根目录
memory_dir = os.path.join(workspace_dir, "memory") # 每日记忆子目录
state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME) # 状态文件
# 创建memory子目录
os.makedirs(memory_dir, exist_ok=True)
@@ -67,7 +71,8 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works
user_path=user_path,
agents_path=agents_path,
memory_path=memory_path,
memory_dir=memory_dir
memory_dir=memory_dir,
state_path=state_path
)
@@ -312,3 +317,65 @@ def _get_memory_template() -> str:
"""
# ============= 状态管理 =============
def is_first_conversation(workspace_dir: str) -> bool:
"""
判断是否为首次对话
Args:
workspace_dir: 工作空间目录
Returns:
True 如果是首次对话False 否则
"""
state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME)
if not os.path.exists(state_path):
return True
try:
with open(state_path, 'r', encoding='utf-8') as f:
state = json.load(f)
return not state.get('has_conversation', False)
except Exception as e:
logger.warning(f"[Workspace] Failed to read state file: {e}")
return True
def mark_conversation_started(workspace_dir: str):
"""
标记已经发生过对话
Args:
workspace_dir: 工作空间目录
"""
state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME)
state = {
'has_conversation': True,
'first_conversation_time': None
}
# 如果文件已存在,保留原有的首次对话时间
if os.path.exists(state_path):
try:
with open(state_path, 'r', encoding='utf-8') as f:
old_state = json.load(f)
if 'first_conversation_time' in old_state:
state['first_conversation_time'] = old_state['first_conversation_time']
except Exception as e:
logger.warning(f"[Workspace] Failed to read old state: {e}")
# 如果是首次标记,记录时间
if state['first_conversation_time'] is None:
from datetime import datetime
state['first_conversation_time'] = datetime.now().isoformat()
try:
with open(state_path, 'w', encoding='utf-8') as f:
json.dump(state, f, indent=2, ensure_ascii=False)
logger.info(f"[Workspace] Marked conversation as started")
except Exception as e:
logger.error(f"[Workspace] Failed to write state file: {e}")

View File

@@ -78,8 +78,9 @@ class AgentStreamExecutor:
Returns:
Final response text
"""
# Log user message
logger.info(f"\n{'='*50}")
# Log user message with model info
logger.info(f"{'='*50}")
logger.info(f"🤖 Model: {self.model.model}")
logger.info(f"👤 用户: {user_message}")
logger.info(f"{'='*50}")
@@ -102,7 +103,7 @@ class AgentStreamExecutor:
try:
while turn < self.max_turns:
turn += 1
logger.info(f"\n{'='*50}{turn}{'='*50}")
logger.info(f"{turn}")
self._emit_event("turn_start", {"turn": turn})
# Check if memory flush is needed (before calling LLM)
@@ -137,8 +138,18 @@ class AgentStreamExecutor:
# No tool calls, end loop
if not tool_calls:
if assistant_msg:
# 检查是否返回了空响应
if not assistant_msg:
logger.warning(f"[Agent] LLM returned empty response (no content and no tool calls)")
# 生成通用的友好提示
final_response = (
"抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。"
)
logger.info(f"Generated fallback response for empty LLM output")
else:
logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}")
logger.info(f"✅ 完成 (无工具调用)")
self._emit_event("turn_end", {
"turn": turn,
@@ -146,42 +157,89 @@ class AgentStreamExecutor:
})
break
# Log tool calls in compact format
tool_names = [tc['name'] for tc in tool_calls]
logger.info(f"🔧 调用工具: {', '.join(tool_names)}")
# Log tool calls with arguments
tool_calls_str = []
for tc in tool_calls:
args_str = ', '.join([f"{k}={v}" for k, v in tc['arguments'].items()])
if args_str:
tool_calls_str.append(f"{tc['name']}({args_str})")
else:
tool_calls_str.append(tc['name'])
logger.info(f"🔧 {', '.join(tool_calls_str)}")
# Execute tools
tool_results = []
tool_result_blocks = []
for tool_call in tool_calls:
result = self._execute_tool(tool_call)
tool_results.append(result)
# Log tool result in compact format
status_emoji = "" if result.get("status") == "success" else ""
result_data = result.get('result', '')
# Format result string with proper Chinese character support
if isinstance(result_data, (dict, list)):
result_str = json.dumps(result_data, ensure_ascii=False)
else:
result_str = str(result_data)
logger.info(f" {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}")
try:
for tool_call in tool_calls:
result = self._execute_tool(tool_call)
tool_results.append(result)
# Log tool result in compact format
status_emoji = "" if result.get("status") == "success" else ""
result_data = result.get('result', '')
# Format result string with proper Chinese character support
if isinstance(result_data, (dict, list)):
result_str = json.dumps(result_data, ensure_ascii=False)
else:
result_str = str(result_data)
logger.info(f" {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}")
# Build tool result block (Claude format)
# Content should be a string representation of the result
result_content = json.dumps(result, ensure_ascii=False) if not isinstance(result, str) else result
tool_result_blocks.append({
"type": "tool_result",
"tool_use_id": tool_call["id"],
"content": result_content
})
# Add tool results to message history as user message (Claude format)
self.messages.append({
"role": "user",
"content": tool_result_blocks
})
# Build tool result block (Claude format)
# Format content in a way that's easy for LLM to understand
is_error = result.get("status") == "error"
if is_error:
# For errors, provide clear error message
result_content = f"Error: {result.get('result', 'Unknown error')}"
elif isinstance(result.get('result'), dict):
# For dict results, use JSON format
result_content = json.dumps(result.get('result'), ensure_ascii=False)
elif isinstance(result.get('result'), str):
# For string results, use directly
result_content = result.get('result')
else:
# Fallback to full JSON
result_content = json.dumps(result, ensure_ascii=False)
tool_result_block = {
"type": "tool_result",
"tool_use_id": tool_call["id"],
"content": result_content
}
# Add is_error field for Claude API (helps model understand failures)
if is_error:
tool_result_block["is_error"] = True
tool_result_blocks.append(tool_result_block)
finally:
# CRITICAL: Always add tool_result to maintain message history integrity
# Even if tool execution fails, we must add error results to match tool_use
if tool_result_blocks:
# Add tool results to message history as user message (Claude format)
self.messages.append({
"role": "user",
"content": tool_result_blocks
})
elif tool_calls:
# If we have tool_calls but no tool_result_blocks (unexpected error),
# create error results for all tool calls to maintain message integrity
logger.warning("⚠️ Tool execution interrupted, adding error results to maintain message history")
emergency_blocks = []
for tool_call in tool_calls:
emergency_blocks.append({
"type": "tool_result",
"tool_use_id": tool_call["id"],
"content": "Error: Tool execution was interrupted",
"is_error": True
})
self.messages.append({
"role": "user",
"content": emergency_blocks
})
self._emit_event("turn_end", {
"turn": turn,
@@ -191,6 +249,11 @@ class AgentStreamExecutor:
if turn >= self.max_turns:
logger.warning(f"⚠️ 已达到最大轮数限制: {self.max_turns}")
if not final_response:
final_response = (
"抱歉,我在处理你的请求时遇到了一些困难,尝试了多次仍未能完成。"
"请尝试简化你的问题,或换一种方式描述。"
)
except Exception as e:
logger.error(f"❌ Agent执行错误: {e}")
@@ -198,18 +261,26 @@ class AgentStreamExecutor:
raise
finally:
logger.info(f"{'='*50} 完成({turn}轮) {'='*50}\n")
logger.info(f"🏁 完成({turn}轮)")
self._emit_event("agent_end", {"final_response": final_response})
return final_response
def _call_llm_stream(self) -> tuple[str, List[Dict]]:
def _call_llm_stream(self, retry_on_empty=True, retry_count=0, max_retries=3) -> tuple[str, List[Dict]]:
"""
Call LLM with streaming
Call LLM with streaming and automatic retry on errors
Args:
retry_on_empty: Whether to retry once if empty response is received
retry_count: Current retry attempt (internal use)
max_retries: Maximum number of retries for API errors
Returns:
(response_text, tool_calls)
"""
# Validate and fix message history first
self._validate_and_fix_messages()
# Trim messages if needed (using agent's context management)
self._trim_messages()
@@ -259,10 +330,20 @@ class AgentStreamExecutor:
for chunk in stream:
# Check for errors
if isinstance(chunk, dict) and chunk.get("error"):
error_msg = chunk.get("message", "Unknown error")
# Extract error message from nested structure
error_data = chunk.get("error", {})
if isinstance(error_data, dict):
error_msg = error_data.get("message", chunk.get("message", "Unknown error"))
error_code = error_data.get("code", "")
else:
error_msg = chunk.get("message", str(error_data))
error_code = ""
status_code = chunk.get("status_code", "N/A")
logger.error(f"API Error: {error_msg} (Status: {status_code})")
logger.error(f"API Error: {error_msg} (Status: {status_code}, Code: {error_code})")
logger.error(f"Full error chunk: {chunk}")
# Raise exception with full error message for retry logic
raise Exception(f"{error_msg} (Status: {status_code})")
# Parse chunk
@@ -299,8 +380,30 @@ class AgentStreamExecutor:
tool_calls_buffer[index]["arguments"] += func["arguments"]
except Exception as e:
logger.error(f"LLM call error: {e}")
raise
error_str = str(e).lower()
# Check if error is retryable (timeout, connection, rate limit, server busy, etc.)
is_retryable = any(keyword in error_str for keyword in [
'timeout', 'timed out', 'connection', 'network',
'rate limit', 'overloaded', 'unavailable', 'busy', 'retry',
'429', '500', '502', '503', '504', '512'
])
if is_retryable and retry_count < max_retries:
wait_time = (retry_count + 1) * 2 # Exponential backoff: 2s, 4s, 6s
logger.warning(f"⚠️ LLM API error (attempt {retry_count + 1}/{max_retries}): {e}")
logger.info(f"Retrying in {wait_time}s...")
time.sleep(wait_time)
return self._call_llm_stream(
retry_on_empty=retry_on_empty,
retry_count=retry_count + 1,
max_retries=max_retries
)
else:
if retry_count >= max_retries:
logger.error(f"❌ LLM API error after {max_retries} retries: {e}")
else:
logger.error(f"❌ LLM call error (non-retryable): {e}")
raise
# Parse tool calls
tool_calls = []
@@ -318,6 +421,21 @@ class AgentStreamExecutor:
"arguments": arguments
})
# Check for empty response and retry once if enabled
if retry_on_empty and not full_content and not tool_calls:
logger.warning(f"⚠️ LLM returned empty response, retrying once...")
self._emit_event("message_end", {
"content": "",
"tool_calls": [],
"empty_retry": True
})
# Retry without retry flag to avoid infinite loop
return self._call_llm_stream(
retry_on_empty=False,
retry_count=retry_count,
max_retries=max_retries
)
# Add assistant message to history (Claude format uses content blocks)
assistant_msg = {"role": "assistant", "content": []}
@@ -393,9 +511,9 @@ class AgentStreamExecutor:
if tool_name == "bash" and result.status == "success":
command = arguments.get("command", "")
if "init_skill.py" in command and self.agent.skill_manager:
logger.info("🔄 Detected skill creation, refreshing skills...")
logger.info("Detected skill creation, refreshing skills...")
self.agent.refresh_skills()
logger.info(f"Skills refreshed! Now have {len(self.agent.skill_manager.skills)} skills")
logger.info(f"Skills refreshed! Now have {len(self.agent.skill_manager.skills)} skills")
self._emit_event("tool_execution_end", {
"tool_call_id": tool_id,
@@ -419,6 +537,27 @@ class AgentStreamExecutor:
})
return error_result
def _validate_and_fix_messages(self):
"""
Validate message history and fix incomplete tool_use/tool_result pairs.
Claude API requires each tool_use to have a corresponding tool_result immediately after.
"""
if not self.messages:
return
# Check last message for incomplete tool_use
if len(self.messages) > 0:
last_msg = self.messages[-1]
if last_msg.get("role") == "assistant":
# Check if assistant message has tool_use blocks
content = last_msg.get("content", [])
if isinstance(content, list):
has_tool_use = any(block.get("type") == "tool_use" for block in content)
if has_tool_use:
# This is incomplete - remove it
logger.warning(f"⚠️ Removing incomplete tool_use message from history")
self.messages.pop()
def _trim_messages(self):
"""
Trim message history to stay within context limits.

View File

@@ -1,3 +0,0 @@
from .file_save import FileSave
__all__ = ['FileSave']

View File

@@ -1,770 +0,0 @@
import os
import time
import re
import json
from pathlib import Path
from typing import Dict, Any, Optional, Tuple
from agent.tools.base_tool import BaseTool, ToolResult, ToolStage
from agent.models import LLMRequest
from common.log import logger
class FileSave(BaseTool):
"""Tool for saving content to files in the workspace directory."""
name = "file_save"
description = "Save the agent's output to a file in the workspace directory. Content is automatically extracted from the agent's previous outputs."
# Set as post-process stage tool
stage = ToolStage.POST_PROCESS
params = {
"type": "object",
"properties": {
"file_name": {
"type": "string",
"description": "Optional. The name of the file to save. If not provided, a name will be generated based on the content."
},
"file_type": {
"type": "string",
"description": "Optional. The type/extension of the file (e.g., 'txt', 'md', 'py', 'java'). If not provided, it will be inferred from the content."
},
"extract_code": {
"type": "boolean",
"description": "Optional. If true, will attempt to extract code blocks from the content. Default is false."
}
},
"required": [] # No required fields, as everything can be extracted from context
}
def __init__(self):
self.context = None
self.config = {}
self.workspace_dir = Path("workspace")
def execute(self, params: Dict[str, Any]) -> ToolResult:
"""
Save content to a file in the workspace directory.
:param params: The parameters for the file output operation.
:return: Result of the operation.
"""
# Extract content from context
if not hasattr(self, 'context') or not self.context:
return ToolResult.fail("Error: No context available to extract content from.")
content = self._extract_content_from_context()
# If no content could be extracted, return error
if not content:
return ToolResult.fail("Error: Couldn't extract content from context.")
# Use model to determine file parameters
try:
task_dir = self._get_task_dir_from_context()
file_name, file_type, extract_code = self._get_file_params_from_model(content)
except Exception as e:
logger.error(f"Error determining file parameters: {str(e)}")
# Fall back to manual parameter extraction
task_dir = params.get("task_dir") or self._get_task_id_from_context() or f"task_{int(time.time())}"
file_name = params.get("file_name") or self._infer_file_name(content)
file_type = params.get("file_type") or self._infer_file_type(content)
extract_code = params.get("extract_code", False)
# Get team_name from context
team_name = self._get_team_name_from_context() or "default_team"
# Create directory structure
task_dir_path = self.workspace_dir / team_name / task_dir
task_dir_path.mkdir(parents=True, exist_ok=True)
if extract_code:
# Save the complete content as markdown
md_file_name = f"{file_name}.md"
md_file_path = task_dir_path / md_file_name
# Write content to file
with open(md_file_path, 'w', encoding='utf-8') as f:
f.write(content)
return self._handle_multiple_code_blocks(content)
# Ensure file_name has the correct extension
if file_type and not file_name.endswith(f".{file_type}"):
file_name = f"{file_name}.{file_type}"
# Create the full file path
file_path = task_dir_path / file_name
# Get absolute path for storage in team_context
abs_file_path = file_path.absolute()
try:
# Write content to file
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
# Update the current agent's final_answer to include file information
if hasattr(self.context, 'team_context'):
# Store with absolute path in team_context
self.context.team_context.agent_outputs[-1].output += f"\n\nSaved file: {abs_file_path}"
return ToolResult.success({
"status": "success",
"file_path": str(file_path) # Return relative path in result
})
except Exception as e:
return ToolResult.fail(f"Error saving file: {str(e)}")
def _handle_multiple_code_blocks(self, content: str) -> ToolResult:
"""
Handle content with multiple code blocks, extracting and saving each as a separate file.
:param content: The content containing multiple code blocks
:return: Result of the operation
"""
# Extract code blocks with context (including potential file name information)
code_blocks_with_context = self._extract_code_blocks_with_context(content)
if not code_blocks_with_context:
return ToolResult.fail("No code blocks found in the content.")
# Get task directory and team name
task_dir = self._get_task_dir_from_context() or f"task_{int(time.time())}"
team_name = self._get_team_name_from_context() or "default_team"
# Create directory structure
task_dir_path = self.workspace_dir / team_name / task_dir
task_dir_path.mkdir(parents=True, exist_ok=True)
saved_files = []
for block_with_context in code_blocks_with_context:
try:
# Use model to determine file name for this code block
block_file_name, block_file_type = self._get_filename_for_code_block(block_with_context)
# Clean the code block (remove md code markers)
clean_code = self._clean_code_block(block_with_context)
# Ensure file_name has the correct extension
if block_file_type and not block_file_name.endswith(f".{block_file_type}"):
block_file_name = f"{block_file_name}.{block_file_type}"
# Create the full file path (no subdirectories)
file_path = task_dir_path / block_file_name
# Get absolute path for storage in team_context
abs_file_path = file_path.absolute()
# Write content to file
with open(file_path, 'w', encoding='utf-8') as f:
f.write(clean_code)
saved_files.append({
"file_path": str(file_path),
"abs_file_path": str(abs_file_path), # Store absolute path for internal use
"file_name": block_file_name,
"size": len(clean_code),
"status": "success",
"type": "code"
})
except Exception as e:
logger.error(f"Error saving code block: {str(e)}")
# Continue with the next block even if this one fails
if not saved_files:
return ToolResult.fail("Failed to save any code blocks.")
# Update the current agent's final_answer to include files information
if hasattr(self, 'context') and self.context:
# If the agent has a final_answer attribute, append the files info to it
if hasattr(self.context, 'team_context'):
# Use relative paths for display
display_info = f"\n\nSaved files to {task_dir_path}:\n" + "\n".join(
[f"- {f['file_path']}" for f in saved_files])
# Check if we need to append the info
if not self.context.team_context.agent_outputs[-1].output.endswith(display_info):
# Store with absolute paths in team_context
abs_info = f"\n\nSaved files to {task_dir_path.absolute()}:\n" + "\n".join(
[f"- {f['abs_file_path']}" for f in saved_files])
self.context.team_context.agent_outputs[-1].output += abs_info
result = {
"status": "success",
"files": [{"file_path": f["file_path"]} for f in saved_files]
}
return ToolResult.success(result)
def _extract_code_blocks_with_context(self, content: str) -> list:
"""
Extract code blocks from content, including context lines before the block.
:param content: The content to extract code blocks from
:return: List of code blocks with context
"""
# Check if content starts with <!DOCTYPE or <html - likely a full HTML file
if content.strip().startswith(("<!DOCTYPE", "<html", "<?xml")):
return [content] # Return the entire content as a single block
# Split content into lines
lines = content.split('\n')
blocks = []
in_code_block = False
current_block = []
context_lines = []
# Check if there are any code block markers in the content
if not re.search(r'```\w+', content):
# If no code block markers and content looks like code, return the entire content
if self._is_likely_code(content):
return [content]
for line in lines:
if line.strip().startswith('```'):
if in_code_block:
# End of code block
current_block.append(line)
# Only add blocks that have a language specified
block_content = '\n'.join(current_block)
if re.search(r'```\w+', current_block[0]):
# Combine context with code block
blocks.append('\n'.join(context_lines + current_block))
current_block = []
context_lines = []
in_code_block = False
else:
# Start of code block - check if it has a language specified
if re.search(r'```\w+', line) and not re.search(r'```language=\s*$', line):
# Start of code block with language
in_code_block = True
current_block = [line]
# Keep only the last few context lines
context_lines = context_lines[-5:] if context_lines else []
elif in_code_block:
current_block.append(line)
else:
# Store context lines when not in a code block
context_lines.append(line)
return blocks
def _get_filename_for_code_block(self, block_with_context: str) -> Tuple[str, str]:
"""
Determine the file name for a code block.
:param block_with_context: The code block with context lines
:return: Tuple of (file_name, file_type)
"""
# Define common code file extensions
COMMON_CODE_EXTENSIONS = {
'py', 'js', 'java', 'c', 'cpp', 'h', 'hpp', 'cs', 'go', 'rb', 'php',
'html', 'css', 'ts', 'jsx', 'tsx', 'vue', 'sh', 'sql', 'json', 'xml',
'yaml', 'yml', 'md', 'rs', 'swift', 'kt', 'scala', 'pl', 'r', 'lua'
}
# Split the block into lines to examine only the context around code block markers
lines = block_with_context.split('\n')
# Find the code block start marker line index
start_marker_idx = -1
for i, line in enumerate(lines):
if line.strip().startswith('```') and not line.strip() == '```':
start_marker_idx = i
break
if start_marker_idx == -1:
# No code block marker found
return "", ""
# Extract the language from the code block marker
code_marker = lines[start_marker_idx].strip()
language = ""
if len(code_marker) > 3:
language = code_marker[3:].strip().split('=')[0].strip()
# Define the context range (5 lines before and 2 after the marker)
context_start = max(0, start_marker_idx - 5)
context_end = min(len(lines), start_marker_idx + 3)
# Extract only the relevant context lines
context_lines = lines[context_start:context_end]
# First, check for explicit file headers like "## filename.ext"
for line in context_lines:
# Match patterns like "## filename.ext" or "# filename.ext"
header_match = re.search(r'^\s*#{1,6}\s+([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)\s*$', line)
if header_match:
file_name = header_match.group(1)
file_type = os.path.splitext(file_name)[1].lstrip('.')
if file_type in COMMON_CODE_EXTENSIONS:
return os.path.splitext(file_name)[0], file_type
# Simple patterns to match explicit file names in the context
file_patterns = [
# Match explicit file names in headers or text
r'(?:file|filename)[:=\s]+[\'"]?([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)[\'"]?',
# Match language=filename.ext in code markers
r'language=([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)',
# Match standalone filenames with extensions
r'\b([a-zA-Z0-9_-]+\.(py|js|java|c|cpp|h|hpp|cs|go|rb|php|html|css|ts|jsx|tsx|vue|sh|sql|json|xml|yaml|yml|md|rs|swift|kt|scala|pl|r|lua))\b',
# Match file paths in comments
r'#\s*([a-zA-Z0-9_/-]+\.[a-zA-Z0-9]+)'
]
# Check each context line for file name patterns
for line in context_lines:
line = line.strip()
for pattern in file_patterns:
matches = re.findall(pattern, line)
if matches:
for match in matches:
if isinstance(match, tuple):
# If the match is a tuple (filename, extension)
file_name = match[0]
file_type = match[1]
# Verify it's not a code reference like Direction.DOWN
if not any(keyword in file_name for keyword in ['class.', 'enum.', 'import.']):
return os.path.splitext(file_name)[0], file_type
else:
# If the match is a string (full filename)
file_name = match
file_type = os.path.splitext(file_name)[1].lstrip('.')
# Verify it's not a code reference
if file_type in COMMON_CODE_EXTENSIONS and not any(
keyword in file_name for keyword in ['class.', 'enum.', 'import.']):
return os.path.splitext(file_name)[0], file_type
# If no explicit file name found, use LLM to infer from code content
# Extract the code content
code_content = block_with_context
# Get the first 20 lines of code for LLM analysis
code_lines = code_content.split('\n')
code_preview = '\n'.join(code_lines[:20])
# Get the model to use
model_to_use = None
if hasattr(self, 'context') and self.context:
if hasattr(self.context, 'model') and self.context.model:
model_to_use = self.context.model
elif hasattr(self.context, 'team_context') and self.context.team_context:
if hasattr(self.context.team_context, 'model') and self.context.team_context.model:
model_to_use = self.context.team_context.model
# If no model is available in context, use the tool's model
if not model_to_use and hasattr(self, 'model') and self.model:
model_to_use = self.model
if model_to_use:
# Prepare a prompt for the model
prompt = f"""Analyze the following code and determine the most appropriate file name and file type/extension.
The file name should be descriptive but concise, using snake_case (lowercase with underscores).
The file type should be a standard file extension (e.g., py, js, html, css, java).
Code preview (first 20 lines):
{code_preview}
Return your answer in JSON format with these fields:
- file_name: The suggested file name (without extension)
- file_type: The suggested file extension
JSON response:"""
# Create a request to the model
request = LLMRequest(
messages=[{"role": "user", "content": prompt}],
temperature=0,
json_format=True
)
try:
response = model_to_use.call(request)
if not response.is_error:
# Clean the JSON response
json_content = self._clean_json_response(response.data["choices"][0]["message"]["content"])
result = json.loads(json_content)
file_name = result.get("file_name", "")
file_type = result.get("file_type", "")
if file_name and file_type:
return file_name, file_type
except Exception as e:
logger.error(f"Error using model to determine file name: {str(e)}")
# If we still don't have a file name, use the language as file type
if language and language in COMMON_CODE_EXTENSIONS:
timestamp = int(time.time())
return f"code_{timestamp}", language
# If all else fails, return empty strings
return "", ""
def _clean_json_response(self, text: str) -> str:
"""
Clean JSON response from LLM by removing markdown code block markers.
:param text: The text containing JSON possibly wrapped in markdown code blocks
:return: Clean JSON string
"""
# Remove markdown code block markers if present
if text.startswith("```json"):
text = text[7:]
elif text.startswith("```"):
# Find the first newline to skip the language identifier line
first_newline = text.find('\n')
if first_newline != -1:
text = text[first_newline + 1:]
if text.endswith("```"):
text = text[:-3]
return text.strip()
def _clean_code_block(self, block_with_context: str) -> str:
"""
Clean a code block by removing markdown code markers and context lines.
:param block_with_context: Code block with context lines
:return: Clean code ready for execution
"""
# Check if this is a full HTML or XML document
if block_with_context.strip().startswith(("<!DOCTYPE", "<html", "<?xml")):
return block_with_context
# Find the code block
code_block_match = re.search(r'```(?:\w+)?(?:[:=][^\n]+)?\n([\s\S]*?)\n```', block_with_context)
if code_block_match:
return code_block_match.group(1)
# If no match found, try to extract anything between ``` markers
lines = block_with_context.split('\n')
start_idx = None
end_idx = None
for i, line in enumerate(lines):
if line.strip().startswith('```'):
if start_idx is None:
start_idx = i
else:
end_idx = i
break
if start_idx is not None and end_idx is not None:
# Extract the code between the markers, excluding the markers themselves
code_lines = lines[start_idx + 1:end_idx]
return '\n'.join(code_lines)
# If all else fails, return the original content
return block_with_context
def _get_file_params_from_model(self, content, model=None):
"""
Use LLM to determine if the content is code and suggest appropriate file parameters.
Args:
content: The content to analyze
model: Optional model to use for the analysis
Returns:
tuple: (file_name, file_type, extract_code) for backward compatibility
"""
if model is None:
model = self.model
if not model:
# Default fallback if no model is available
return "output", "txt", False
prompt = f"""
Analyze the following content and determine:
1. Is this primarily code implementation (where most of the content consists of code blocks)?
2. What would be an appropriate filename and file extension?
Content to analyze: ```
{content[:500]} # Only show first 500 chars to avoid token limits ```
{"..." if len(content) > 500 else ""}
Respond in JSON format only with the following structure:
{{
"is_code": true/false, # Whether this is primarily code implementation
"filename": "suggested_filename", # Don't include extension, english words
"extension": "appropriate_extension" # Don't include the dot, e.g., "md", "py", "js"
}}
"""
try:
# Create a request to the model
request = LLMRequest(
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
json_format=True
)
# Call the model using the standard interface
response = model.call(request)
if response.is_error:
logger.warning(f"Error from model: {response.error_message}")
raise Exception(f"Model error: {response.error_message}")
# Extract JSON from response
result = response.data["choices"][0]["message"]["content"]
# Clean the JSON response
result = self._clean_json_response(result)
# Parse the JSON
params = json.loads(result)
# For backward compatibility, return tuple format
file_name = params.get("filename", "output")
# Remove dot from extension if present
file_type = params.get("extension", "md").lstrip(".")
extract_code = params.get("is_code", False)
return file_name, file_type, extract_code
except Exception as e:
logger.warning(f"Error getting file parameters from model: {e}")
# Default fallback
return "output", "md", False
def _get_team_name_from_context(self) -> Optional[str]:
"""
Get team name from the agent's context.
:return: Team name or None if not found
"""
if hasattr(self, 'context') and self.context:
# Try to get team name from team_context
if hasattr(self.context, 'team_context') and self.context.team_context:
return self.context.team_context.name
# Try direct team_name attribute
if hasattr(self.context, 'name'):
return self.context.name
return None
def _get_task_id_from_context(self) -> Optional[str]:
"""
Get task ID from the agent's context.
:return: Task ID or None if not found
"""
if hasattr(self, 'context') and self.context:
# Try to get task ID from task object
if hasattr(self.context, 'task') and self.context.task:
return self.context.task.id
# Try team_context's task
if hasattr(self.context, 'team_context') and self.context.team_context:
if hasattr(self.context.team_context, 'task') and self.context.team_context.task:
return self.context.team_context.task.id
return None
def _get_task_dir_from_context(self) -> Optional[str]:
"""
Get task directory name from the team context.
:return: Task directory name or None if not found
"""
if hasattr(self, 'context') and self.context:
# Try to get from team_context
if hasattr(self.context, 'team_context') and self.context.team_context:
if hasattr(self.context.team_context, 'task_short_name') and self.context.team_context.task_short_name:
return self.context.team_context.task_short_name
# Fall back to task ID if available
return self._get_task_id_from_context()
def _extract_content_from_context(self) -> str:
"""
Extract content from the agent's context.
:return: Extracted content
"""
# Check if we have access to the agent's context
if not hasattr(self, 'context') or not self.context:
return ""
# Try to get the most recent final answer from the agent
if hasattr(self.context, 'final_answer') and self.context.final_answer:
return self.context.final_answer
# Try to get the most recent final answer from team context
if hasattr(self.context, 'team_context') and self.context.team_context:
if hasattr(self.context.team_context, 'agent_outputs') and self.context.team_context.agent_outputs:
latest_output = self.context.team_context.agent_outputs[-1].output
return latest_output
# If we have action history, try to get the most recent final answer
if hasattr(self.context, 'action_history') and self.context.action_history:
for action in reversed(self.context.action_history):
if "final_answer" in action and action["final_answer"]:
return action["final_answer"]
return ""
def _extract_code_blocks(self, content: str) -> str:
"""
Extract code blocks from markdown content.
:param content: The content to extract code blocks from
:return: Extracted code blocks
"""
# Pattern to match markdown code blocks
code_block_pattern = r'```(?:\w+)?\n([\s\S]*?)\n```'
# Find all code blocks
code_blocks = re.findall(code_block_pattern, content)
if code_blocks:
# Join all code blocks with newlines
return '\n\n'.join(code_blocks)
return content # Return original content if no code blocks found
def _infer_file_name(self, content: str) -> str:
"""
Infer a file name from the content.
:param content: The content to analyze.
:return: A suggested file name.
"""
# Check for title patterns in markdown
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
if title_match:
# Convert title to a valid filename
title = title_match.group(1).strip()
return self._sanitize_filename(title)
# Check for class/function definitions in code
code_match = re.search(r'(class|def|function)\s+(\w+)', content)
if code_match:
return self._sanitize_filename(code_match.group(2))
# Default name based on content type
if self._is_likely_code(content):
return "code"
elif self._is_likely_markdown(content):
return "document"
elif self._is_likely_json(content):
return "data"
else:
return "output"
def _infer_file_type(self, content: str) -> str:
"""
Infer the file type/extension from the content.
:param content: The content to analyze.
:return: A suggested file extension.
"""
# Check for common programming language patterns
if re.search(r'(import\s+[a-zA-Z0-9_]+|from\s+[a-zA-Z0-9_\.]+\s+import)', content):
return "py" # Python
elif re.search(r'(public\s+class|private\s+class|protected\s+class)', content):
return "java" # Java
elif re.search(r'(function\s+\w+\s*\(|const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=)', content):
return "js" # JavaScript
elif re.search(r'(<html|<body|<div|<p>)', content):
return "html" # HTML
elif re.search(r'(#include\s+<\w+\.h>|int\s+main\s*\()', content):
return "cpp" # C/C++
# Check for markdown
if self._is_likely_markdown(content):
return "md"
# Check for JSON
if self._is_likely_json(content):
return "json"
# Default to text
return "txt"
def _is_likely_code(self, content: str) -> bool:
"""Check if the content is likely code."""
# First check for common HTML/XML patterns
if content.strip().startswith(("<!DOCTYPE", "<html", "<?xml", "<head", "<body")):
return True
code_patterns = [
r'(class|def|function|import|from|public|private|protected|#include)',
r'(\{\s*\n|\}\s*\n|\[\s*\n|\]\s*\n)',
r'(if\s*\(|for\s*\(|while\s*\()',
r'(<\w+>.*?</\w+>)', # HTML/XML tags
r'(var|let|const)\s+\w+\s*=', # JavaScript variable declarations
r'#\s*\w+', # CSS ID selectors or Python comments
r'\.\w+\s*\{', # CSS class selectors
r'@media|@import|@font-face' # CSS at-rules
]
return any(re.search(pattern, content) for pattern in code_patterns)
def _is_likely_markdown(self, content: str) -> bool:
"""Check if the content is likely markdown."""
md_patterns = [
r'^#\s+.+$', # Headers
r'^\*\s+.+$', # Unordered lists
r'^\d+\.\s+.+$', # Ordered lists
r'\[.+\]\(.+\)', # Links
r'!\[.+\]\(.+\)' # Images
]
return any(re.search(pattern, content, re.MULTILINE) for pattern in md_patterns)
def _is_likely_json(self, content: str) -> bool:
"""Check if the content is likely JSON."""
try:
content = content.strip()
if (content.startswith('{') and content.endswith('}')) or (
content.startswith('[') and content.endswith(']')):
json.loads(content)
return True
except:
pass
return False
def _sanitize_filename(self, name: str) -> str:
"""
Sanitize a string to be used as a filename.
:param name: The string to sanitize.
:return: A sanitized filename.
"""
# Replace spaces with underscores
name = name.replace(' ', '_')
# Remove invalid characters
name = re.sub(r'[^\w\-\.]', '', name)
# Limit length
if len(name) > 50:
name = name[:50]
return name.lower()
def _process_file_path(self, file_path: str) -> Tuple[str, str]:
"""
Process a file path to extract the file name and type, and create directories if needed.
:param file_path: The file path to process
:return: Tuple of (file_name, file_type)
"""
# Get the file name and extension
file_name = os.path.basename(file_path)
file_type = os.path.splitext(file_name)[1].lstrip('.')
return os.path.splitext(file_name)[0], file_type

View File

@@ -23,7 +23,7 @@ class Ls(BaseTool):
"properties": {
"path": {
"type": "string",
"description": "Directory to list (default: current directory)"
"description": "Directory to list. IMPORTANT: Relative paths are based on workspace directory. To access directories outside workspace, use absolute paths starting with ~ or /."
},
"limit": {
"type": "integer",
@@ -51,6 +51,13 @@ class Ls(BaseTool):
absolute_path = self._resolve_path(path)
if not os.path.exists(absolute_path):
# Provide helpful hint if using relative path
if not os.path.isabs(path) and not path.startswith('~'):
return ToolResult.fail(
f"Error: Path not found: {path}\n"
f"Resolved to: {absolute_path}\n"
f"Hint: Relative paths are based on workspace ({self.cwd}). For files outside workspace, use absolute paths."
)
return ToolResult.fail(f"Error: Path not found: {path}")
if not os.path.isdir(absolute_path):

View File

@@ -22,7 +22,7 @@ class Read(BaseTool):
"properties": {
"path": {
"type": "string",
"description": "Path to the file to read (relative or absolute)"
"description": "Path to the file to read. IMPORTANT: Relative paths are based on workspace directory. To access files outside workspace, use absolute paths starting with ~ or /."
},
"offset": {
"type": "integer",
@@ -63,6 +63,13 @@ class Read(BaseTool):
# Check if file exists
if not os.path.exists(absolute_path):
# Provide helpful hint if using relative path
if not os.path.isabs(path) and not path.startswith('~'):
return ToolResult.fail(
f"Error: File not found: {path}\n"
f"Resolved to: {absolute_path}\n"
f"Hint: Relative paths are based on workspace ({self.cwd}). For files outside workspace, use absolute paths."
)
return ToolResult.fail(f"Error: File not found: {path}")
# Check if readable

View File

@@ -46,7 +46,7 @@ class WebFetch(BaseTool):
def __init__(self, config: dict = None):
self.config = config or {}
self.timeout = self.config.get("timeout", 30)
self.timeout = self.config.get("timeout", 20)
self.max_redirects = self.config.get("max_redirects", 3)
self.user_agent = self.config.get(
"user_agent",

View File

@@ -245,8 +245,7 @@ class GoogleGeminiBot(Bot):
gen_config = {}
if kwargs.get("temperature") is not None:
gen_config["temperature"] = kwargs["temperature"]
if kwargs.get("max_tokens"):
gen_config["maxOutputTokens"] = kwargs["max_tokens"]
if gen_config:
payload["generationConfig"] = gen_config
@@ -255,7 +254,7 @@ class GoogleGeminiBot(Bot):
gemini_tools = self._convert_tools_to_gemini_rest_format(tools)
if gemini_tools:
payload["tools"] = gemini_tools
logger.info(f"[Gemini] Added {len(tools)} tools to request")
logger.debug(f"[Gemini] Added {len(tools)} tools to request")
# Make REST API call
base_url = f"{self.api_base}/v1beta"
@@ -445,6 +444,9 @@ class GoogleGeminiBot(Bot):
all_tool_calls = []
has_sent_tool_calls = False
has_content = False # Track if any content was sent
chunk_count = 0
last_finish_reason = None
last_safety_ratings = None
for line in response.iter_lines():
if not line:
@@ -461,6 +463,7 @@ class GoogleGeminiBot(Bot):
try:
chunk_data = json.loads(line)
chunk_count += 1
logger.debug(f"[Gemini] Stream chunk: {json.dumps(chunk_data, ensure_ascii=False)[:200]}")
candidates = chunk_data.get("candidates", [])
@@ -469,6 +472,13 @@ class GoogleGeminiBot(Bot):
continue
candidate = candidates[0]
# 记录 finish_reason 和 safety_ratings
if "finishReason" in candidate:
last_finish_reason = candidate["finishReason"]
if "safetyRatings" in candidate:
last_safety_ratings = candidate["safetyRatings"]
content = candidate.get("content", {})
parts = content.get("parts", [])
@@ -512,7 +522,7 @@ class GoogleGeminiBot(Bot):
# Send tool calls if any were collected
if all_tool_calls and not has_sent_tool_calls:
logger.info(f"[Gemini] Stream detected {len(all_tool_calls)} tool calls")
logger.debug(f"[Gemini] Stream detected {len(all_tool_calls)} tool calls")
yield {
"id": f"chatcmpl-{time.time()}",
"object": "chat.completion.chunk",
@@ -526,8 +536,17 @@ class GoogleGeminiBot(Bot):
}
has_sent_tool_calls = True
# Log summary
logger.info(f"[Gemini] Stream complete: has_content={has_content}, tool_calls={len(all_tool_calls)}")
# Log summary (only if there's something interesting)
if not has_content and not all_tool_calls:
logger.debug(f"[Gemini] Stream complete: has_content={has_content}, tool_calls={len(all_tool_calls)}")
elif all_tool_calls:
logger.debug(f"[Gemini] Stream complete: {len(all_tool_calls)} tool calls")
else:
logger.debug(f"[Gemini] Stream complete: text response")
# 如果返回空响应,记录详细警告
if not has_content and not all_tool_calls:
logger.warning(f"[Gemini] ⚠️ Empty response detected!")
# Final chunk
yield {

View File

@@ -4,6 +4,7 @@
import re
import time
import requests
import json
import config
from bot.bot import Bot
from bot.openai_compatible_bot import OpenAICompatibleBot
@@ -463,7 +464,7 @@ class LinkAISessionManager(SessionManager):
session.add_query(query)
session.add_reply(reply)
try:
max_tokens = conf().get("conversation_max_tokens", 2500)
max_tokens = conf().get("conversation_max_tokens", 8000)
tokens_cnt = session.discard_exceeding(max_tokens, total_tokens)
logger.debug(f"[LinkAI] chat history, before tokens={total_tokens}, now tokens={tokens_cnt}")
except Exception as e:
@@ -504,6 +505,26 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs):
Formatted response in OpenAI format or generator for streaming
"""
try:
# Convert messages from Claude format to OpenAI format
# This is important because Agent uses Claude format internally
messages = self._convert_messages_to_openai_format(messages)
# Convert tools from Claude format to OpenAI format
if tools:
tools = self._convert_tools_to_openai_format(tools)
# Handle system prompt (OpenAI uses system message, Claude uses separate parameter)
system_prompt = kwargs.get('system')
if system_prompt:
# Add system message at the beginning if not already present
if not messages or messages[0].get('role') != 'system':
messages = [{"role": "system", "content": system_prompt}] + messages
else:
# Replace existing system message
messages[0] = {"role": "system", "content": system_prompt}
logger.debug(f"[LinkAI] messages: {len(messages)}, tools: {len(tools) if tools else 0}, stream: {stream}")
# Build request parameters (LinkAI uses OpenAI-compatible format)
body = {
"messages": messages,
@@ -514,17 +535,7 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs):
"presence_penalty": kwargs.get("presence_penalty", conf().get("presence_penalty", 0.0)),
"stream": stream
}
# Add max_tokens if specified
if kwargs.get("max_tokens"):
body["max_tokens"] = kwargs["max_tokens"]
# Add app_code if provided
app_code = kwargs.get("app_code", conf().get("linkai_app_code"))
if app_code:
body["app_code"] = app_code
# Add tools if provided (OpenAI-compatible format)
if tools:
body["tools"] = tools
body["tool_choice"] = kwargs.get("tool_choice", "auto")
@@ -567,8 +578,8 @@ def _handle_linkai_sync_response(self, base_url, headers, body):
if res.status_code == 200:
response = res.json()
logger.info(f"[LinkAI] call_with_tools reply, model={response.get('model')}, "
f"total_tokens={response.get('usage', {}).get('total_tokens', 0)}")
logger.debug(f"[LinkAI] reply: model={response.get('model')}, "
f"tokens={response.get('usage', {}).get('total_tokens', 0)}")
# LinkAI response is already in OpenAI-compatible format
return response

View File

@@ -66,14 +66,22 @@ class AgentLLMModel(LLMModel):
self.bridge = bridge
self.bot_type = bot_type
self._bot = None
self._use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key")
@property
def bot(self):
"""Lazy load the bot and enhance it with tool calling if needed"""
if self._bot is None:
self._bot = self.bridge.get_bot(self.bot_type)
# Automatically add tool calling support if not present
self._bot = add_openai_compatible_support(self._bot)
# If use_linkai is enabled, use LinkAI bot directly
if self._use_linkai:
self._bot = self.bridge.find_chat_bot(const.LINKAI)
else:
self._bot = self.bridge.get_bot(self.bot_type)
# Automatically add tool calling support if not present
self._bot = add_openai_compatible_support(self._bot)
# Log bot info
bot_name = type(self._bot).__name__
return self._bot
def call(self, request: LLMRequest):
@@ -88,11 +96,18 @@ class AgentLLMModel(LLMModel):
kwargs = {
'messages': request.messages,
'tools': getattr(request, 'tools', None),
'stream': False
'stream': False,
'model': self.model # Pass model parameter
}
# Only pass max_tokens if it's explicitly set
if request.max_tokens is not None:
kwargs['max_tokens'] = request.max_tokens
# Extract system prompt if present
system_prompt = getattr(request, 'system', None)
if system_prompt:
kwargs['system'] = system_prompt
response = self.bot.call_with_tools(**kwargs)
return self._format_response(response)
else:
@@ -122,7 +137,8 @@ class AgentLLMModel(LLMModel):
'messages': request.messages,
'tools': getattr(request, 'tools', None),
'stream': True,
'max_tokens': max_tokens
'max_tokens': max_tokens,
'model': self.model # Pass model parameter
}
# Add system prompt if present
@@ -155,13 +171,15 @@ class AgentLLMModel(LLMModel):
class AgentBridge:
"""
Bridge class that integrates single super Agent with COW
Bridge class that integrates super Agent with COW
Manages multiple agent instances per session for conversation isolation
"""
def __init__(self, bridge: Bridge):
self.bridge = bridge
self.agents = {} # session_id -> Agent instance mapping
self.default_agent = None # For backward compatibility (no session_id)
self.agent: Optional[Agent] = None
def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent:
"""
Create the super agent with COW integration
@@ -193,8 +211,8 @@ class AgentBridge:
except Exception as e:
logger.warning(f"[AgentBridge] Failed to load tool {tool_name}: {e}")
# Create the single super agent
self.agent = Agent(
# Create agent instance
agent = Agent(
system_prompt=system_prompt,
description=kwargs.get("description", "AI Super Agent"),
model=model,
@@ -209,21 +227,38 @@ class AgentBridge:
)
# Log skill loading details
if self.agent.skill_manager:
if agent.skill_manager:
logger.info(f"[AgentBridge] SkillManager initialized:")
logger.info(f"[AgentBridge] - Managed dir: {self.agent.skill_manager.managed_skills_dir}")
logger.info(f"[AgentBridge] - Workspace dir: {self.agent.skill_manager.workspace_dir}")
logger.info(f"[AgentBridge] - Total skills: {len(self.agent.skill_manager.skills)}")
for skill_name in self.agent.skill_manager.skills.keys():
logger.info(f"[AgentBridge] - Managed dir: {agent.skill_manager.managed_skills_dir}")
logger.info(f"[AgentBridge] - Workspace dir: {agent.skill_manager.workspace_dir}")
logger.info(f"[AgentBridge] - Total skills: {len(agent.skill_manager.skills)}")
for skill_name in agent.skill_manager.skills.keys():
logger.info(f"[AgentBridge] * {skill_name}")
return self.agent
return agent
def get_agent(self) -> Optional[Agent]:
"""Get the super agent, create if not exists"""
if self.agent is None:
self._init_default_agent()
return self.agent
def get_agent(self, session_id: str = None) -> Optional[Agent]:
"""
Get agent instance for the given session
Args:
session_id: Session identifier (e.g., user_id). If None, returns default agent.
Returns:
Agent instance for this session
"""
# If no session_id, use default agent (backward compatibility)
if session_id is None:
if self.default_agent is None:
self._init_default_agent()
return self.default_agent
# Check if agent exists for this session
if session_id not in self.agents:
logger.info(f"[AgentBridge] Creating new agent for session: {session_id}")
self._init_agent_for_session(session_id)
return self.agents[session_id]
def _init_default_agent(self):
"""Initialize default super agent with new prompt system"""
@@ -291,6 +326,12 @@ class AgentBridge:
tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
if 'memory_manager' in file_config:
tool.memory_manager = file_config['memory_manager']
# Apply API key for bocha_search tool
elif tool_name == 'bocha_search':
bocha_api_key = conf().get("bocha_api_key", "")
if bocha_api_key:
tool.config = {"bocha_api_key": bocha_api_key}
tool.api_key = bocha_api_key
tools.append(tool)
logger.debug(f"[AgentBridge] Loaded tool: {tool_name}")
except Exception as e:
@@ -307,6 +348,12 @@ class AgentBridge:
context_files = load_context_files(workspace_root)
logger.info(f"[AgentBridge] Loaded {len(context_files)} context files: {[f.path for f in context_files]}")
# Check if this is the first conversation
from agent.prompt.workspace import is_first_conversation, mark_conversation_started
is_first = is_first_conversation(workspace_root)
if is_first:
logger.info("[AgentBridge] First conversation detected")
# Build system prompt using new prompt builder
prompt_builder = PromptBuilder(
workspace_dir=workspace_root,
@@ -317,15 +364,20 @@ class AgentBridge:
runtime_info = {
"model": conf().get("model", "unknown"),
"workspace": workspace_root,
"channel": "web" # TODO: get from actual channel, default to "web" to hide if not specified
"channel": conf().get("channel_type", "unknown") # Get from config
}
system_prompt = prompt_builder.build(
tools=tools,
context_files=context_files,
memory_manager=memory_manager,
runtime_info=runtime_info
runtime_info=runtime_info,
is_first_conversation=is_first
)
# Mark conversation as started (will be saved after first user message)
if is_first:
mark_conversation_started(workspace_root)
logger.info("[AgentBridge] System prompt built successfully")
@@ -343,6 +395,127 @@ class AgentBridge:
if memory_manager:
agent.memory_manager = memory_manager
logger.info(f"[AgentBridge] Memory manager attached to agent")
# Store as default agent
self.default_agent = agent
def _init_agent_for_session(self, session_id: str):
"""
Initialize agent for a specific session
Reuses the same configuration as default agent
"""
from config import conf
import os
# Get workspace from config
workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow"))
# Initialize workspace
from agent.prompt import ensure_workspace, load_context_files, PromptBuilder
workspace_files = ensure_workspace(workspace_root, create_templates=True)
# Setup memory system
memory_manager = None
memory_tools = []
try:
from agent.memory import MemoryManager, MemoryConfig
from agent.tools import MemorySearchTool, MemoryGetTool
memory_config = MemoryConfig(
workspace_root=workspace_root,
embedding_provider="local",
embedding_model="all-MiniLM-L6-v2"
)
memory_manager = MemoryManager(memory_config)
memory_tools = [
MemorySearchTool(memory_manager),
MemoryGetTool(memory_manager)
]
except Exception as e:
logger.debug(f"[AgentBridge] Memory system not available for session {session_id}: {e}")
# Load tools
from agent.tools import ToolManager
tool_manager = ToolManager()
tool_manager.load_tools()
tools = []
file_config = {
"cwd": workspace_root,
"memory_manager": memory_manager
} if memory_manager else {"cwd": workspace_root}
for tool_name in tool_manager.tool_classes.keys():
try:
tool = tool_manager.create_tool(tool_name)
if tool:
if tool_name in ['read', 'write', 'edit', 'bash', 'grep', 'find', 'ls']:
tool.config = file_config
tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None)
if 'memory_manager' in file_config:
tool.memory_manager = file_config['memory_manager']
elif tool_name == 'bocha_search':
bocha_api_key = conf().get("bocha_api_key", "")
if bocha_api_key:
tool.config = {"bocha_api_key": bocha_api_key}
tool.api_key = bocha_api_key
tools.append(tool)
except Exception as e:
logger.warning(f"[AgentBridge] Failed to load tool {tool_name} for session {session_id}: {e}")
if memory_tools:
tools.extend(memory_tools)
# Load context files
context_files = load_context_files(workspace_root)
# Check if this is the first conversation
from agent.prompt.workspace import is_first_conversation, mark_conversation_started
is_first = is_first_conversation(workspace_root)
# Build system prompt
prompt_builder = PromptBuilder(
workspace_dir=workspace_root,
language="zh"
)
runtime_info = {
"model": conf().get("model", "unknown"),
"workspace": workspace_root,
"channel": conf().get("channel_type", "unknown")
}
system_prompt = prompt_builder.build(
tools=tools,
context_files=context_files,
memory_manager=memory_manager,
runtime_info=runtime_info,
is_first_conversation=is_first
)
if is_first:
mark_conversation_started(workspace_root)
# Create agent for this session
agent = self.create_agent(
system_prompt=system_prompt,
tools=tools,
max_steps=50,
output_mode="logger",
workspace_dir=workspace_root,
enable_skills=True
)
if memory_manager:
agent.memory_manager = memory_manager
# Store agent for this session
self.agents[session_id] = agent
logger.info(f"[AgentBridge] Agent created for session: {session_id}")
def agent_reply(self, query: str, context: Context = None,
on_event=None, clear_history: bool = False) -> Reply:
@@ -351,7 +524,7 @@ class AgentBridge:
Args:
query: User query
context: COW context (optional)
context: COW context (optional, contains session_id for user isolation)
on_event: Event callback (optional)
clear_history: Whether to clear conversation history
@@ -359,8 +532,13 @@ class AgentBridge:
Reply object
"""
try:
# Get agent (will auto-initialize if needed)
agent = self.get_agent()
# Extract session_id from context for user isolation
session_id = None
if context:
session_id = context.kwargs.get("session_id") or context.get("session_id")
# Get agent for this session (will auto-initialize if needed)
agent = self.get_agent(session_id=session_id)
if not agent:
return Reply(ReplyType.ERROR, "Failed to initialize super agent")
@@ -375,4 +553,21 @@ class AgentBridge:
except Exception as e:
logger.error(f"Agent reply error: {e}")
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
return Reply(ReplyType.ERROR, f"Agent error: {str(e)}")
def clear_session(self, session_id: str):
"""
Clear a specific session's agent and conversation history
Args:
session_id: Session identifier to clear
"""
if session_id in self.agents:
logger.info(f"[AgentBridge] Clearing session: {session_id}")
del self.agents[session_id]
def clear_all_sessions(self):
"""Clear all agent sessions"""
logger.info(f"[AgentBridge] Clearing all sessions ({len(self.agents)} total)")
self.agents.clear()
self.default_agent = None

View File

@@ -47,6 +47,10 @@ class Channel(object):
try:
logger.info("[Channel] Using agent mode")
# Add channel_type to context if not present
if context and "channel_type" not in context:
context["channel_type"] = self.channel_type
# Use agent bridge to handle the query
return Bridge().fetch_agent_reply(
query=query,

View File

@@ -49,8 +49,6 @@ class WebChannel(ChatChannel):
self.msg_id_counter = 0 # 添加消息ID计数器
self.session_queues = {} # 存储session_id到队列的映射
self.request_to_session = {} # 存储request_id到session_id的映射
# web channel无需前缀
conf()["single_chat_prefix"] = [""]
def _generate_msg_id(self):
@@ -122,18 +120,30 @@ class WebChannel(ChatChannel):
if session_id not in self.session_queues:
self.session_queues[session_id] = Queue()
# Web channel 不需要前缀,确保消息能通过前缀检查
trigger_prefixs = conf().get("single_chat_prefix", [""])
if check_prefix(prompt, trigger_prefixs) is None:
# 如果没有匹配到前缀,给消息加上第一个前缀
if trigger_prefixs:
prompt = trigger_prefixs[0] + prompt
logger.debug(f"[WebChannel] Added prefix to message: {prompt}")
# 创建消息对象
msg = WebMessage(self._generate_msg_id(), prompt)
msg.from_user_id = session_id # 使用会话ID作为用户ID
# 创建上下文
context = self._compose_context(ContextType.TEXT, prompt, msg=msg)
# 创建上下文,明确指定 isgroup=False
context = self._compose_context(ContextType.TEXT, prompt, msg=msg, isgroup=False)
# 检查 context 是否为 None可能被插件过滤等
if context is None:
logger.warning(f"[WebChannel] Context is None for session {session_id}, message may be filtered")
return json.dumps({"status": "error", "message": "Message was filtered"})
# 添加必要的字段
# 覆盖必要的字段_compose_context 会设置默认值,但我们需要使用实际的 session_id
context["session_id"] = session_id
context["receiver"] = session_id
context["request_id"] = request_id
context["isgroup"] = False # 添加 isgroup 字段
context["receiver"] = session_id # 添加 receiver 字段
# 异步处理消息 - 只传递上下文
threading.Thread(target=self.produce, args=(context,)).start()

View File

@@ -185,7 +185,8 @@ available_setting = {
"Minimax_base_url": "",
"web_port": 9899,
"agent": False, # 是否开启Agent模式
"agent_workspace": "~/cow" # agent工作空间路径用于存储skills、memory等
"agent_workspace": "~/cow", # agent工作空间路径用于存储skills、memory等
"bocha_api_key": ""
}

View File

@@ -68,10 +68,11 @@ skill-name/
- Must test scripts before including
**references/** - When to include:
- Documentation for agent to reference
- Database schemas, API docs, domain knowledge
- **ONLY** when documentation is too large for SKILL.md (>500 lines)
- Database schemas, complex API specs that agent needs to reference
- Agent reads these files into context as needed
- For large files (>10k words), include grep patterns in SKILL.md
- **NOT for**: API reference docs, usage examples, tutorials (put in SKILL.md instead)
- **Rule of thumb**: If it fits in SKILL.md, don't create a separate reference file
**assets/** - When to include:
- Files used in output (not loaded to context)
@@ -82,11 +83,15 @@ skill-name/
### What NOT to Include
Do NOT create auxiliary documentation:
- README.md
- INSTALLATION_GUIDE.md
- CHANGELOG.md
- Other non-essential files
Do NOT create auxiliary documentation files:
- README.md - Instructions belong in SKILL.md
- INSTALLATION_GUIDE.md - Setup info belongs in SKILL.md
- CHANGELOG.md - Not needed for local skills
- API_REFERENCE.md - Put API docs directly in SKILL.md
- USAGE_EXAMPLES.md - Put examples directly in SKILL.md
- Any other documentation files - Everything goes in SKILL.md unless it's too large
**Critical Rule**: Only create files that the agent will actually execute (scripts) or that are too large for SKILL.md (references). Documentation, examples, and guides ALL belong in SKILL.md.
## Skill Creation Process
@@ -133,22 +138,31 @@ To turn concrete examples into an effective skill, analyze each example by:
1. Considering how to execute on the example from scratch
2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly
**Planning Checklist**:
-**Always needed**: SKILL.md with clear description and usage instructions
-**scripts/**: Only if code needs to be executed (not just shown as examples)
-**references/**: Rarely needed - only if documentation is >500 lines and can't fit in SKILL.md
-**assets/**: Only if files are used in output (templates, boilerplate, etc.)
Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows:
1. Rotating a PDF requires re-writing the same code each time
2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill
3. ❌ Don't create `references/api-docs.md` - put API info in SKILL.md instead
Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows:
1. Writing a frontend webapp requires the same boilerplate HTML/React each time
2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill
3. ❌ Don't create `references/usage-examples.md` - put examples in SKILL.md instead
Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows:
1. Querying BigQuery requires re-discovering the table schemas and relationships each time
2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill
2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill (ONLY because schemas are very large)
3. ❌ Don't create separate `references/query-examples.md` - put examples in SKILL.md instead
To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets.
To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets. **Default to putting everything in SKILL.md unless there's a compelling reason to separate it.**
### Step 3: Initialize the Skill
@@ -200,6 +214,12 @@ These files contain established best practices for effective skill design.
To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`.
**Important Guidelines**:
- **scripts/**: Only create scripts that will be executed. Test all scripts before including.
- **references/**: ONLY create if documentation is too large for SKILL.md (>500 lines). Most skills don't need this.
- **assets/**: Only include files used in output (templates, icons, etc.)
- **Default approach**: Put everything in SKILL.md unless there's a specific reason not to.
Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion.
If you used `--examples`, delete any placeholder files that are not needed for the skill. Only create resource directories that are actually required.