From 0e53ba4311ae28be2146aa4dca64c6d88f5fa960 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 14:59:55 +0800 Subject: [PATCH 01/10] fix: gemini error process --- agent/prompt/builder.py | 6 + agent/protocol/agent_stream.py | 12 +- agent/tools/file_save/__init__.py | 3 - agent/tools/file_save/file_save.py | 770 ----------------------------- bot/gemini/google_gemini_bot.py | 9 + 5 files changed, 26 insertions(+), 774 deletions(-) delete mode 100644 agent/tools/file_save/__init__.py delete mode 100644 agent/tools/file_save/file_save.py diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 1a0cc67..f78d4eb 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -352,6 +352,12 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", f"你的工作目录是: `{workspace_dir}`", "", + "**路径使用规则** (非常重要):", + "", + "- **工作空间内的文件**: 使用相对路径(如 `SOUL.md`、`memory/daily.md`)", + "- **工作空间外的文件**: 必须使用绝对路径(如 `~/project/code.py`、`/etc/config`)", + "- **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", + "", "除非用户明确指示,否则将此目录视为文件操作的全局工作空间。", "", "**重要说明 - 文件已自动加载**:", diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 2cf1a54..5633d53 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -137,8 +137,18 @@ class AgentStreamExecutor: # No tool calls, end loop if not tool_calls: - if assistant_msg: + # 检查是否返回了空响应 + if not assistant_msg: + logger.warning(f"[Agent] LLM returned empty response (no content and no tool calls)") + + # 生成通用的友好提示 + final_response = ( + "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。" + ) + logger.info(f"💭 Generated fallback response for empty LLM output") + else: logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}") + logger.info(f"✅ 完成 (无工具调用)") self._emit_event("turn_end", { "turn": turn, diff --git a/agent/tools/file_save/__init__.py b/agent/tools/file_save/__init__.py deleted file mode 100644 index d00d726..0000000 --- a/agent/tools/file_save/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .file_save import FileSave - -__all__ = ['FileSave'] diff --git a/agent/tools/file_save/file_save.py b/agent/tools/file_save/file_save.py deleted file mode 100644 index 5e2cdae..0000000 --- a/agent/tools/file_save/file_save.py +++ /dev/null @@ -1,770 +0,0 @@ -import os -import time -import re -import json -from pathlib import Path -from typing import Dict, Any, Optional, Tuple - -from agent.tools.base_tool import BaseTool, ToolResult, ToolStage -from agent.models import LLMRequest -from common.log import logger - - -class FileSave(BaseTool): - """Tool for saving content to files in the workspace directory.""" - - name = "file_save" - description = "Save the agent's output to a file in the workspace directory. Content is automatically extracted from the agent's previous outputs." - - # Set as post-process stage tool - stage = ToolStage.POST_PROCESS - - params = { - "type": "object", - "properties": { - "file_name": { - "type": "string", - "description": "Optional. The name of the file to save. If not provided, a name will be generated based on the content." - }, - "file_type": { - "type": "string", - "description": "Optional. The type/extension of the file (e.g., 'txt', 'md', 'py', 'java'). If not provided, it will be inferred from the content." - }, - "extract_code": { - "type": "boolean", - "description": "Optional. If true, will attempt to extract code blocks from the content. Default is false." - } - }, - "required": [] # No required fields, as everything can be extracted from context - } - - def __init__(self): - self.context = None - self.config = {} - self.workspace_dir = Path("workspace") - - def execute(self, params: Dict[str, Any]) -> ToolResult: - """ - Save content to a file in the workspace directory. - - :param params: The parameters for the file output operation. - :return: Result of the operation. - """ - # Extract content from context - if not hasattr(self, 'context') or not self.context: - return ToolResult.fail("Error: No context available to extract content from.") - - content = self._extract_content_from_context() - - # If no content could be extracted, return error - if not content: - return ToolResult.fail("Error: Couldn't extract content from context.") - - # Use model to determine file parameters - try: - task_dir = self._get_task_dir_from_context() - file_name, file_type, extract_code = self._get_file_params_from_model(content) - except Exception as e: - logger.error(f"Error determining file parameters: {str(e)}") - # Fall back to manual parameter extraction - task_dir = params.get("task_dir") or self._get_task_id_from_context() or f"task_{int(time.time())}" - file_name = params.get("file_name") or self._infer_file_name(content) - file_type = params.get("file_type") or self._infer_file_type(content) - extract_code = params.get("extract_code", False) - - # Get team_name from context - team_name = self._get_team_name_from_context() or "default_team" - - # Create directory structure - task_dir_path = self.workspace_dir / team_name / task_dir - task_dir_path.mkdir(parents=True, exist_ok=True) - - if extract_code: - # Save the complete content as markdown - md_file_name = f"{file_name}.md" - md_file_path = task_dir_path / md_file_name - - # Write content to file - with open(md_file_path, 'w', encoding='utf-8') as f: - f.write(content) - - return self._handle_multiple_code_blocks(content) - - # Ensure file_name has the correct extension - if file_type and not file_name.endswith(f".{file_type}"): - file_name = f"{file_name}.{file_type}" - - # Create the full file path - file_path = task_dir_path / file_name - - # Get absolute path for storage in team_context - abs_file_path = file_path.absolute() - - try: - # Write content to file - with open(file_path, 'w', encoding='utf-8') as f: - f.write(content) - - # Update the current agent's final_answer to include file information - if hasattr(self.context, 'team_context'): - # Store with absolute path in team_context - self.context.team_context.agent_outputs[-1].output += f"\n\nSaved file: {abs_file_path}" - - return ToolResult.success({ - "status": "success", - "file_path": str(file_path) # Return relative path in result - }) - - except Exception as e: - return ToolResult.fail(f"Error saving file: {str(e)}") - - def _handle_multiple_code_blocks(self, content: str) -> ToolResult: - """ - Handle content with multiple code blocks, extracting and saving each as a separate file. - - :param content: The content containing multiple code blocks - :return: Result of the operation - """ - # Extract code blocks with context (including potential file name information) - code_blocks_with_context = self._extract_code_blocks_with_context(content) - - if not code_blocks_with_context: - return ToolResult.fail("No code blocks found in the content.") - - # Get task directory and team name - task_dir = self._get_task_dir_from_context() or f"task_{int(time.time())}" - team_name = self._get_team_name_from_context() or "default_team" - - # Create directory structure - task_dir_path = self.workspace_dir / team_name / task_dir - task_dir_path.mkdir(parents=True, exist_ok=True) - - saved_files = [] - - for block_with_context in code_blocks_with_context: - try: - # Use model to determine file name for this code block - block_file_name, block_file_type = self._get_filename_for_code_block(block_with_context) - - # Clean the code block (remove md code markers) - clean_code = self._clean_code_block(block_with_context) - - # Ensure file_name has the correct extension - if block_file_type and not block_file_name.endswith(f".{block_file_type}"): - block_file_name = f"{block_file_name}.{block_file_type}" - - # Create the full file path (no subdirectories) - file_path = task_dir_path / block_file_name - - # Get absolute path for storage in team_context - abs_file_path = file_path.absolute() - - # Write content to file - with open(file_path, 'w', encoding='utf-8') as f: - f.write(clean_code) - - saved_files.append({ - "file_path": str(file_path), - "abs_file_path": str(abs_file_path), # Store absolute path for internal use - "file_name": block_file_name, - "size": len(clean_code), - "status": "success", - "type": "code" - }) - - except Exception as e: - logger.error(f"Error saving code block: {str(e)}") - # Continue with the next block even if this one fails - - if not saved_files: - return ToolResult.fail("Failed to save any code blocks.") - - # Update the current agent's final_answer to include files information - if hasattr(self, 'context') and self.context: - # If the agent has a final_answer attribute, append the files info to it - if hasattr(self.context, 'team_context'): - # Use relative paths for display - display_info = f"\n\nSaved files to {task_dir_path}:\n" + "\n".join( - [f"- {f['file_path']}" for f in saved_files]) - - # Check if we need to append the info - if not self.context.team_context.agent_outputs[-1].output.endswith(display_info): - # Store with absolute paths in team_context - abs_info = f"\n\nSaved files to {task_dir_path.absolute()}:\n" + "\n".join( - [f"- {f['abs_file_path']}" for f in saved_files]) - self.context.team_context.agent_outputs[-1].output += abs_info - - result = { - "status": "success", - "files": [{"file_path": f["file_path"]} for f in saved_files] - } - - return ToolResult.success(result) - - def _extract_code_blocks_with_context(self, content: str) -> list: - """ - Extract code blocks from content, including context lines before the block. - - :param content: The content to extract code blocks from - :return: List of code blocks with context - """ - # Check if content starts with Tuple[str, str]: - """ - Determine the file name for a code block. - - :param block_with_context: The code block with context lines - :return: Tuple of (file_name, file_type) - """ - # Define common code file extensions - COMMON_CODE_EXTENSIONS = { - 'py', 'js', 'java', 'c', 'cpp', 'h', 'hpp', 'cs', 'go', 'rb', 'php', - 'html', 'css', 'ts', 'jsx', 'tsx', 'vue', 'sh', 'sql', 'json', 'xml', - 'yaml', 'yml', 'md', 'rs', 'swift', 'kt', 'scala', 'pl', 'r', 'lua' - } - - # Split the block into lines to examine only the context around code block markers - lines = block_with_context.split('\n') - - # Find the code block start marker line index - start_marker_idx = -1 - for i, line in enumerate(lines): - if line.strip().startswith('```') and not line.strip() == '```': - start_marker_idx = i - break - - if start_marker_idx == -1: - # No code block marker found - return "", "" - - # Extract the language from the code block marker - code_marker = lines[start_marker_idx].strip() - language = "" - if len(code_marker) > 3: - language = code_marker[3:].strip().split('=')[0].strip() - - # Define the context range (5 lines before and 2 after the marker) - context_start = max(0, start_marker_idx - 5) - context_end = min(len(lines), start_marker_idx + 3) - - # Extract only the relevant context lines - context_lines = lines[context_start:context_end] - - # First, check for explicit file headers like "## filename.ext" - for line in context_lines: - # Match patterns like "## filename.ext" or "# filename.ext" - header_match = re.search(r'^\s*#{1,6}\s+([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)\s*$', line) - if header_match: - file_name = header_match.group(1) - file_type = os.path.splitext(file_name)[1].lstrip('.') - if file_type in COMMON_CODE_EXTENSIONS: - return os.path.splitext(file_name)[0], file_type - - # Simple patterns to match explicit file names in the context - file_patterns = [ - # Match explicit file names in headers or text - r'(?:file|filename)[:=\s]+[\'"]?([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)[\'"]?', - # Match language=filename.ext in code markers - r'language=([a-zA-Z0-9_-]+\.[a-zA-Z0-9]+)', - # Match standalone filenames with extensions - r'\b([a-zA-Z0-9_-]+\.(py|js|java|c|cpp|h|hpp|cs|go|rb|php|html|css|ts|jsx|tsx|vue|sh|sql|json|xml|yaml|yml|md|rs|swift|kt|scala|pl|r|lua))\b', - # Match file paths in comments - r'#\s*([a-zA-Z0-9_/-]+\.[a-zA-Z0-9]+)' - ] - - # Check each context line for file name patterns - for line in context_lines: - line = line.strip() - for pattern in file_patterns: - matches = re.findall(pattern, line) - if matches: - for match in matches: - if isinstance(match, tuple): - # If the match is a tuple (filename, extension) - file_name = match[0] - file_type = match[1] - # Verify it's not a code reference like Direction.DOWN - if not any(keyword in file_name for keyword in ['class.', 'enum.', 'import.']): - return os.path.splitext(file_name)[0], file_type - else: - # If the match is a string (full filename) - file_name = match - file_type = os.path.splitext(file_name)[1].lstrip('.') - # Verify it's not a code reference - if file_type in COMMON_CODE_EXTENSIONS and not any( - keyword in file_name for keyword in ['class.', 'enum.', 'import.']): - return os.path.splitext(file_name)[0], file_type - - # If no explicit file name found, use LLM to infer from code content - # Extract the code content - code_content = block_with_context - - # Get the first 20 lines of code for LLM analysis - code_lines = code_content.split('\n') - code_preview = '\n'.join(code_lines[:20]) - - # Get the model to use - model_to_use = None - if hasattr(self, 'context') and self.context: - if hasattr(self.context, 'model') and self.context.model: - model_to_use = self.context.model - elif hasattr(self.context, 'team_context') and self.context.team_context: - if hasattr(self.context.team_context, 'model') and self.context.team_context.model: - model_to_use = self.context.team_context.model - - # If no model is available in context, use the tool's model - if not model_to_use and hasattr(self, 'model') and self.model: - model_to_use = self.model - - if model_to_use: - # Prepare a prompt for the model - prompt = f"""Analyze the following code and determine the most appropriate file name and file type/extension. -The file name should be descriptive but concise, using snake_case (lowercase with underscores). -The file type should be a standard file extension (e.g., py, js, html, css, java). - -Code preview (first 20 lines): -{code_preview} - -Return your answer in JSON format with these fields: -- file_name: The suggested file name (without extension) -- file_type: The suggested file extension - -JSON response:""" - - # Create a request to the model - request = LLMRequest( - messages=[{"role": "user", "content": prompt}], - temperature=0, - json_format=True - ) - - try: - response = model_to_use.call(request) - - if not response.is_error: - # Clean the JSON response - json_content = self._clean_json_response(response.data["choices"][0]["message"]["content"]) - result = json.loads(json_content) - - file_name = result.get("file_name", "") - file_type = result.get("file_type", "") - - if file_name and file_type: - return file_name, file_type - except Exception as e: - logger.error(f"Error using model to determine file name: {str(e)}") - - # If we still don't have a file name, use the language as file type - if language and language in COMMON_CODE_EXTENSIONS: - timestamp = int(time.time()) - return f"code_{timestamp}", language - - # If all else fails, return empty strings - return "", "" - - def _clean_json_response(self, text: str) -> str: - """ - Clean JSON response from LLM by removing markdown code block markers. - - :param text: The text containing JSON possibly wrapped in markdown code blocks - :return: Clean JSON string - """ - # Remove markdown code block markers if present - if text.startswith("```json"): - text = text[7:] - elif text.startswith("```"): - # Find the first newline to skip the language identifier line - first_newline = text.find('\n') - if first_newline != -1: - text = text[first_newline + 1:] - - if text.endswith("```"): - text = text[:-3] - - return text.strip() - - def _clean_code_block(self, block_with_context: str) -> str: - """ - Clean a code block by removing markdown code markers and context lines. - - :param block_with_context: Code block with context lines - :return: Clean code ready for execution - """ - # Check if this is a full HTML or XML document - if block_with_context.strip().startswith((" 500 else ""} - - Respond in JSON format only with the following structure: - {{ - "is_code": true/false, # Whether this is primarily code implementation - "filename": "suggested_filename", # Don't include extension, english words - "extension": "appropriate_extension" # Don't include the dot, e.g., "md", "py", "js" - }} - """ - - try: - # Create a request to the model - request = LLMRequest( - messages=[{"role": "user", "content": prompt}], - temperature=0.1, - json_format=True - ) - - # Call the model using the standard interface - response = model.call(request) - - if response.is_error: - logger.warning(f"Error from model: {response.error_message}") - raise Exception(f"Model error: {response.error_message}") - - # Extract JSON from response - result = response.data["choices"][0]["message"]["content"] - - # Clean the JSON response - result = self._clean_json_response(result) - - # Parse the JSON - params = json.loads(result) - - # For backward compatibility, return tuple format - file_name = params.get("filename", "output") - # Remove dot from extension if present - file_type = params.get("extension", "md").lstrip(".") - extract_code = params.get("is_code", False) - - return file_name, file_type, extract_code - except Exception as e: - logger.warning(f"Error getting file parameters from model: {e}") - # Default fallback - return "output", "md", False - - def _get_team_name_from_context(self) -> Optional[str]: - """ - Get team name from the agent's context. - - :return: Team name or None if not found - """ - if hasattr(self, 'context') and self.context: - # Try to get team name from team_context - if hasattr(self.context, 'team_context') and self.context.team_context: - return self.context.team_context.name - - # Try direct team_name attribute - if hasattr(self.context, 'name'): - return self.context.name - - return None - - def _get_task_id_from_context(self) -> Optional[str]: - """ - Get task ID from the agent's context. - - :return: Task ID or None if not found - """ - if hasattr(self, 'context') and self.context: - # Try to get task ID from task object - if hasattr(self.context, 'task') and self.context.task: - return self.context.task.id - - # Try team_context's task - if hasattr(self.context, 'team_context') and self.context.team_context: - if hasattr(self.context.team_context, 'task') and self.context.team_context.task: - return self.context.team_context.task.id - - return None - - def _get_task_dir_from_context(self) -> Optional[str]: - """ - Get task directory name from the team context. - - :return: Task directory name or None if not found - """ - if hasattr(self, 'context') and self.context: - # Try to get from team_context - if hasattr(self.context, 'team_context') and self.context.team_context: - if hasattr(self.context.team_context, 'task_short_name') and self.context.team_context.task_short_name: - return self.context.team_context.task_short_name - - # Fall back to task ID if available - return self._get_task_id_from_context() - - def _extract_content_from_context(self) -> str: - """ - Extract content from the agent's context. - - :return: Extracted content - """ - # Check if we have access to the agent's context - if not hasattr(self, 'context') or not self.context: - return "" - - # Try to get the most recent final answer from the agent - if hasattr(self.context, 'final_answer') and self.context.final_answer: - return self.context.final_answer - - # Try to get the most recent final answer from team context - if hasattr(self.context, 'team_context') and self.context.team_context: - if hasattr(self.context.team_context, 'agent_outputs') and self.context.team_context.agent_outputs: - latest_output = self.context.team_context.agent_outputs[-1].output - return latest_output - - # If we have action history, try to get the most recent final answer - if hasattr(self.context, 'action_history') and self.context.action_history: - for action in reversed(self.context.action_history): - if "final_answer" in action and action["final_answer"]: - return action["final_answer"] - - return "" - - def _extract_code_blocks(self, content: str) -> str: - """ - Extract code blocks from markdown content. - - :param content: The content to extract code blocks from - :return: Extracted code blocks - """ - # Pattern to match markdown code blocks - code_block_pattern = r'```(?:\w+)?\n([\s\S]*?)\n```' - - # Find all code blocks - code_blocks = re.findall(code_block_pattern, content) - - if code_blocks: - # Join all code blocks with newlines - return '\n\n'.join(code_blocks) - - return content # Return original content if no code blocks found - - def _infer_file_name(self, content: str) -> str: - """ - Infer a file name from the content. - - :param content: The content to analyze. - :return: A suggested file name. - """ - # Check for title patterns in markdown - title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE) - if title_match: - # Convert title to a valid filename - title = title_match.group(1).strip() - return self._sanitize_filename(title) - - # Check for class/function definitions in code - code_match = re.search(r'(class|def|function)\s+(\w+)', content) - if code_match: - return self._sanitize_filename(code_match.group(2)) - - # Default name based on content type - if self._is_likely_code(content): - return "code" - elif self._is_likely_markdown(content): - return "document" - elif self._is_likely_json(content): - return "data" - else: - return "output" - - def _infer_file_type(self, content: str) -> str: - """ - Infer the file type/extension from the content. - - :param content: The content to analyze. - :return: A suggested file extension. - """ - # Check for common programming language patterns - if re.search(r'(import\s+[a-zA-Z0-9_]+|from\s+[a-zA-Z0-9_\.]+\s+import)', content): - return "py" # Python - elif re.search(r'(public\s+class|private\s+class|protected\s+class)', content): - return "java" # Java - elif re.search(r'(function\s+\w+\s*\(|const\s+\w+\s*=|let\s+\w+\s*=|var\s+\w+\s*=)', content): - return "js" # JavaScript - elif re.search(r'()', content): - return "html" # HTML - elif re.search(r'(#include\s+<\w+\.h>|int\s+main\s*\()', content): - return "cpp" # C/C++ - - # Check for markdown - if self._is_likely_markdown(content): - return "md" - - # Check for JSON - if self._is_likely_json(content): - return "json" - - # Default to text - return "txt" - - def _is_likely_code(self, content: str) -> bool: - """Check if the content is likely code.""" - # First check for common HTML/XML patterns - if content.strip().startswith((".*?)', # HTML/XML tags - r'(var|let|const)\s+\w+\s*=', # JavaScript variable declarations - r'#\s*\w+', # CSS ID selectors or Python comments - r'\.\w+\s*\{', # CSS class selectors - r'@media|@import|@font-face' # CSS at-rules - ] - return any(re.search(pattern, content) for pattern in code_patterns) - - def _is_likely_markdown(self, content: str) -> bool: - """Check if the content is likely markdown.""" - md_patterns = [ - r'^#\s+.+$', # Headers - r'^\*\s+.+$', # Unordered lists - r'^\d+\.\s+.+$', # Ordered lists - r'\[.+\]\(.+\)', # Links - r'!\[.+\]\(.+\)' # Images - ] - return any(re.search(pattern, content, re.MULTILINE) for pattern in md_patterns) - - def _is_likely_json(self, content: str) -> bool: - """Check if the content is likely JSON.""" - try: - content = content.strip() - if (content.startswith('{') and content.endswith('}')) or ( - content.startswith('[') and content.endswith(']')): - json.loads(content) - return True - except: - pass - return False - - def _sanitize_filename(self, name: str) -> str: - """ - Sanitize a string to be used as a filename. - - :param name: The string to sanitize. - :return: A sanitized filename. - """ - # Replace spaces with underscores - name = name.replace(' ', '_') - - # Remove invalid characters - name = re.sub(r'[^\w\-\.]', '', name) - - # Limit length - if len(name) > 50: - name = name[:50] - - return name.lower() - - def _process_file_path(self, file_path: str) -> Tuple[str, str]: - """ - Process a file path to extract the file name and type, and create directories if needed. - - :param file_path: The file path to process - :return: Tuple of (file_name, file_type) - """ - # Get the file name and extension - file_name = os.path.basename(file_path) - file_type = os.path.splitext(file_name)[1].lstrip('.') - - return os.path.splitext(file_name)[0], file_type diff --git a/bot/gemini/google_gemini_bot.py b/bot/gemini/google_gemini_bot.py index 34c1f1f..ca01a2d 100644 --- a/bot/gemini/google_gemini_bot.py +++ b/bot/gemini/google_gemini_bot.py @@ -529,6 +529,15 @@ class GoogleGeminiBot(Bot): # Log summary logger.info(f"[Gemini] Stream complete: has_content={has_content}, tool_calls={len(all_tool_calls)}") + # 如果返回空响应,记录详细警告 + if not has_content and not all_tool_calls: + logger.warning(f"[Gemini] ⚠️ Empty response detected!") + logger.warning(f"[Gemini] Possible reasons:") + logger.warning(f" 1. Model couldn't generate response based on context") + logger.warning(f" 2. Content blocked by safety filters") + logger.warning(f" 3. All previous tool calls failed") + logger.warning(f" 4. API error not properly caught") + # Final chunk yield { "id": f"chatcmpl-{time.time()}", From 5fd3e85a830b94c156e76ed468f893a1cd40ce39 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 15:53:24 +0800 Subject: [PATCH 02/10] feat: add llm retry --- agent/prompt/builder.py | 2 +- agent/protocol/agent_stream.py | 98 ++++++++++++++++++++++++++++----- bot/gemini/google_gemini_bot.py | 29 +++++++--- 3 files changed, 106 insertions(+), 23 deletions(-) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index f78d4eb..0042d72 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -354,7 +354,7 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", "**路径使用规则** (非常重要):", "", - "- **工作空间内的文件**: 使用相对路径(如 `SOUL.md`、`memory/daily.md`)", + "- **工作空间内的文件**: 可以使用相对路径(如 `SOUL.md`、`MEMORY.md`)", "- **工作空间外的文件**: 必须使用绝对路径(如 `~/project/code.py`、`/etc/config`)", "- **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", "", diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 5633d53..d197023 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -102,7 +102,7 @@ class AgentStreamExecutor: try: while turn < self.max_turns: turn += 1 - logger.info(f"\n{'='*50} 第 {turn} 轮 {'='*50}") + logger.info(f"\n🔄 第 {turn} 轮") self._emit_event("turn_start", {"turn": turn}) # Check if memory flush is needed (before calling LLM) @@ -156,9 +156,15 @@ class AgentStreamExecutor: }) break - # Log tool calls in compact format - tool_names = [tc['name'] for tc in tool_calls] - logger.info(f"🔧 调用工具: {', '.join(tool_names)}") + # Log tool calls with arguments + tool_calls_str = [] + for tc in tool_calls: + args_str = ', '.join([f"{k}={v}" for k, v in tc['arguments'].items()]) + if args_str: + tool_calls_str.append(f"{tc['name']}({args_str})") + else: + tool_calls_str.append(tc['name']) + logger.info(f"🔧 {', '.join(tool_calls_str)}") # Execute tools tool_results = [] @@ -179,13 +185,33 @@ class AgentStreamExecutor: logger.info(f" {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}") # Build tool result block (Claude format) - # Content should be a string representation of the result - result_content = json.dumps(result, ensure_ascii=False) if not isinstance(result, str) else result - tool_result_blocks.append({ + # Format content in a way that's easy for LLM to understand + is_error = result.get("status") == "error" + + if is_error: + # For errors, provide clear error message + result_content = f"Error: {result.get('result', 'Unknown error')}" + elif isinstance(result.get('result'), dict): + # For dict results, use JSON format + result_content = json.dumps(result.get('result'), ensure_ascii=False) + elif isinstance(result.get('result'), str): + # For string results, use directly + result_content = result.get('result') + else: + # Fallback to full JSON + result_content = json.dumps(result, ensure_ascii=False) + + tool_result_block = { "type": "tool_result", "tool_use_id": tool_call["id"], "content": result_content - }) + } + + # Add is_error field for Claude API (helps model understand failures) + if is_error: + tool_result_block["is_error"] = True + + tool_result_blocks.append(tool_result_block) # Add tool results to message history as user message (Claude format) self.messages.append({ @@ -201,6 +227,11 @@ class AgentStreamExecutor: if turn >= self.max_turns: logger.warning(f"⚠️ 已达到最大轮数限制: {self.max_turns}") + if not final_response: + final_response = ( + "抱歉,我在处理你的请求时遇到了一些困难,尝试了多次仍未能完成。" + "请尝试简化你的问题,或换一种方式描述。" + ) except Exception as e: logger.error(f"❌ Agent执行错误: {e}") @@ -208,14 +239,19 @@ class AgentStreamExecutor: raise finally: - logger.info(f"{'='*50} 完成({turn}轮) {'='*50}\n") + logger.info(f"🏁 完成({turn}轮)\n") self._emit_event("agent_end", {"final_response": final_response}) return final_response - def _call_llm_stream(self) -> tuple[str, List[Dict]]: + def _call_llm_stream(self, retry_on_empty=True, retry_count=0, max_retries=3) -> tuple[str, List[Dict]]: """ - Call LLM with streaming + Call LLM with streaming and automatic retry on errors + + Args: + retry_on_empty: Whether to retry once if empty response is received + retry_count: Current retry attempt (internal use) + max_retries: Maximum number of retries for API errors Returns: (response_text, tool_calls) @@ -309,8 +345,29 @@ class AgentStreamExecutor: tool_calls_buffer[index]["arguments"] += func["arguments"] except Exception as e: - logger.error(f"LLM call error: {e}") - raise + error_str = str(e).lower() + # Check if error is retryable (timeout, connection, rate limit, etc.) + is_retryable = any(keyword in error_str for keyword in [ + 'timeout', 'timed out', 'connection', 'network', + 'rate limit', 'overloaded', 'unavailable', '429', '500', '502', '503', '504' + ]) + + if is_retryable and retry_count < max_retries: + wait_time = (retry_count + 1) * 2 # Exponential backoff: 2s, 4s, 6s + logger.warning(f"⚠️ LLM API error (attempt {retry_count + 1}/{max_retries}): {e}") + logger.info(f"🔄 Retrying in {wait_time}s...") + time.sleep(wait_time) + return self._call_llm_stream( + retry_on_empty=retry_on_empty, + retry_count=retry_count + 1, + max_retries=max_retries + ) + else: + if retry_count >= max_retries: + logger.error(f"❌ LLM API error after {max_retries} retries: {e}") + else: + logger.error(f"❌ LLM call error (non-retryable): {e}") + raise # Parse tool calls tool_calls = [] @@ -328,6 +385,21 @@ class AgentStreamExecutor: "arguments": arguments }) + # Check for empty response and retry once if enabled + if retry_on_empty and not full_content and not tool_calls: + logger.warning(f"⚠️ LLM returned empty response, retrying once...") + self._emit_event("message_end", { + "content": "", + "tool_calls": [], + "empty_retry": True + }) + # Retry without retry flag to avoid infinite loop + return self._call_llm_stream( + retry_on_empty=False, + retry_count=retry_count, + max_retries=max_retries + ) + # Add assistant message to history (Claude format uses content blocks) assistant_msg = {"role": "assistant", "content": []} diff --git a/bot/gemini/google_gemini_bot.py b/bot/gemini/google_gemini_bot.py index ca01a2d..e166c30 100644 --- a/bot/gemini/google_gemini_bot.py +++ b/bot/gemini/google_gemini_bot.py @@ -255,7 +255,7 @@ class GoogleGeminiBot(Bot): gemini_tools = self._convert_tools_to_gemini_rest_format(tools) if gemini_tools: payload["tools"] = gemini_tools - logger.info(f"[Gemini] Added {len(tools)} tools to request") + logger.debug(f"[Gemini] Added {len(tools)} tools to request") # Make REST API call base_url = f"{self.api_base}/v1beta" @@ -445,6 +445,9 @@ class GoogleGeminiBot(Bot): all_tool_calls = [] has_sent_tool_calls = False has_content = False # Track if any content was sent + chunk_count = 0 + last_finish_reason = None + last_safety_ratings = None for line in response.iter_lines(): if not line: @@ -461,6 +464,7 @@ class GoogleGeminiBot(Bot): try: chunk_data = json.loads(line) + chunk_count += 1 logger.debug(f"[Gemini] Stream chunk: {json.dumps(chunk_data, ensure_ascii=False)[:200]}") candidates = chunk_data.get("candidates", []) @@ -469,6 +473,13 @@ class GoogleGeminiBot(Bot): continue candidate = candidates[0] + + # 记录 finish_reason 和 safety_ratings + if "finishReason" in candidate: + last_finish_reason = candidate["finishReason"] + if "safetyRatings" in candidate: + last_safety_ratings = candidate["safetyRatings"] + content = candidate.get("content", {}) parts = content.get("parts", []) @@ -512,7 +523,7 @@ class GoogleGeminiBot(Bot): # Send tool calls if any were collected if all_tool_calls and not has_sent_tool_calls: - logger.info(f"[Gemini] Stream detected {len(all_tool_calls)} tool calls") + logger.debug(f"[Gemini] Stream detected {len(all_tool_calls)} tool calls") yield { "id": f"chatcmpl-{time.time()}", "object": "chat.completion.chunk", @@ -526,17 +537,17 @@ class GoogleGeminiBot(Bot): } has_sent_tool_calls = True - # Log summary - logger.info(f"[Gemini] Stream complete: has_content={has_content}, tool_calls={len(all_tool_calls)}") + # Log summary (only if there's something interesting) + if not has_content and not all_tool_calls: + logger.debug(f"[Gemini] Stream complete: has_content={has_content}, tool_calls={len(all_tool_calls)}") + elif all_tool_calls: + logger.debug(f"[Gemini] Stream complete: {len(all_tool_calls)} tool calls") + else: + logger.debug(f"[Gemini] Stream complete: text response") # 如果返回空响应,记录详细警告 if not has_content and not all_tool_calls: logger.warning(f"[Gemini] ⚠️ Empty response detected!") - logger.warning(f"[Gemini] Possible reasons:") - logger.warning(f" 1. Model couldn't generate response based on context") - logger.warning(f" 2. Content blocked by safety filters") - logger.warning(f" 3. All previous tool calls failed") - logger.warning(f" 4. API error not properly caught") # Final chunk yield { From ac87979cb75f84051f01e0673a45a2c5c766668c Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 16:01:37 +0800 Subject: [PATCH 03/10] fix: bash prompt optimize --- agent/prompt/builder.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 0042d72..5086960 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -354,11 +354,19 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", "**路径使用规则** (非常重要):", "", - "- **工作空间内的文件**: 可以使用相对路径(如 `SOUL.md`、`MEMORY.md`)", - "- **工作空间外的文件**: 必须使用绝对路径(如 `~/project/code.py`、`/etc/config`)", - "- **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", + f"1. **相对路径的基准目录**: 所有相对路径都是相对于 `{workspace_dir}` 而言的", + f" - ✅ 正确: 访问工作空间内的文件用相对路径,如 `SOUL.md`", + f" - ❌ 错误: 用相对路径访问其他目录的文件 (如果它不在 `{workspace_dir}` 内)", "", - "除非用户明确指示,否则将此目录视为文件操作的全局工作空间。", + "2. **访问其他目录**: 如果要访问工作空间之外的目录(如项目代码、系统文件),**必须使用绝对路径**", + f" - ✅ 正确: 例如 `~/chatgpt-on-wechat`、`/usr/local/`", + f" - ❌ 错误: 假设相对路径会指向其他目录", + "", + "3. **路径解析示例**:", + f" - 相对路径 `memory/` → 实际路径 `{workspace_dir}/memory/`", + f" - 绝对路径 `~/chatgpt-on-wechat/docs/` → 实际路径 `~/chatgpt-on-wechat/docs/`", + "", + "4. **不确定时**: 先用 `bash pwd` 确认当前目录,或用 `ls .` 查看当前位置", "", "**重要说明 - 文件已自动加载**:", "", From 489894c61dd077899c1ace63cd43f93acf13bd47 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 16:05:20 +0800 Subject: [PATCH 04/10] fix: path prompt --- agent/tools/ls/ls.py | 7 +++++++ agent/tools/read/read.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/agent/tools/ls/ls.py b/agent/tools/ls/ls.py index 0bd4f01..35e401b 100644 --- a/agent/tools/ls/ls.py +++ b/agent/tools/ls/ls.py @@ -51,6 +51,13 @@ class Ls(BaseTool): absolute_path = self._resolve_path(path) if not os.path.exists(absolute_path): + # Provide helpful hint if using relative path + if not os.path.isabs(path) and not path.startswith('~'): + return ToolResult.fail( + f"Error: Path not found: {path}\n" + f"Resolved to: {absolute_path}\n" + f"Hint: If accessing files outside workspace ({self.cwd}), use absolute path like ~/{path} or /full/path/{path}" + ) return ToolResult.fail(f"Error: Path not found: {path}") if not os.path.isdir(absolute_path): diff --git a/agent/tools/read/read.py b/agent/tools/read/read.py index 8fad254..9c1d6dc 100644 --- a/agent/tools/read/read.py +++ b/agent/tools/read/read.py @@ -63,6 +63,13 @@ class Read(BaseTool): # Check if file exists if not os.path.exists(absolute_path): + # Provide helpful hint if using relative path + if not os.path.isabs(path) and not path.startswith('~'): + return ToolResult.fail( + f"Error: File not found: {path}\n" + f"Resolved to: {absolute_path}\n" + f"Hint: If accessing files outside workspace ({self.cwd}), use absolute path like ~/{path}" + ) return ToolResult.fail(f"Error: File not found: {path}") # Check if readable From 624917fac4a8aad2fd159ed861bbe9e598c901fb Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 16:53:33 +0800 Subject: [PATCH 05/10] fix: memory and path bug --- agent/memory/summarizer.py | 4 ++-- agent/prompt/builder.py | 5 ++--- agent/protocol/agent_stream.py | 19 ++++++++++++---- agent/tools/ls/ls.py | 4 ++-- agent/tools/read/read.py | 4 ++-- bot/gemini/google_gemini_bot.py | 3 +-- bot/linkai/link_ai_bot.py | 40 +++++++++++++++++++++++---------- bridge/agent_bridge.py | 24 +++++++++++++++----- 8 files changed, 71 insertions(+), 32 deletions(-) diff --git a/agent/memory/summarizer.py b/agent/memory/summarizer.py index 020164e..4b102ab 100644 --- a/agent/memory/summarizer.py +++ b/agent/memory/summarizer.py @@ -117,7 +117,7 @@ class MemoryFlushManager: return user_dir / "MEMORY.md" else: # Return workspace root MEMORY.md - return Path(self.workspace_root) / "MEMORY.md" + return Path(self.workspace_dir) / "MEMORY.md" def create_flush_prompt(self) -> str: """ @@ -214,7 +214,7 @@ def create_memory_files_if_needed(workspace_dir: Path, user_id: Optional[str] = user_dir.mkdir(parents=True, exist_ok=True) main_memory = user_dir / "MEMORY.md" else: - main_memory = Path(workspace_root) / "MEMORY.md" + main_memory = Path(workspace_dir) / "MEMORY.md" if not main_memory.exists(): # Create empty file or with minimal structure (no obvious "Memory" header) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 5086960..a5c99c3 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -378,7 +378,7 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", "**首次对话**:", "", - "如果这是你与用户的首次对话,并且你的人格设定和用户信息还是空白或初始状态:", + "如果这是你与用户的首次对话,并你的`SOUL.md`和`USER.md`均是完全空白或初始模板状态的时候才会进行以下流程:", "", "1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待", "2. **简短打招呼后,分点询问三个核心问题**:", @@ -391,8 +391,7 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "", "**重要**: ", "- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问。用自然表达如「我已记住」而非「已更新 SOUL.md」", - "- 不要问太多其他信息(职业、时区等可以后续自然了解)", - "- 保持简洁,避免过度抒情", + "- 不要问太多其他信息(职业、时区等可以后续自然了解),只要`SOUL.md`和`USER.md`又被填写过真实内容而不是占位则说明已经不是首次对话了,此时不用进行初始流程", "", ] diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index d197023..95ad64f 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -305,10 +305,20 @@ class AgentStreamExecutor: for chunk in stream: # Check for errors if isinstance(chunk, dict) and chunk.get("error"): - error_msg = chunk.get("message", "Unknown error") + # Extract error message from nested structure + error_data = chunk.get("error", {}) + if isinstance(error_data, dict): + error_msg = error_data.get("message", chunk.get("message", "Unknown error")) + error_code = error_data.get("code", "") + else: + error_msg = chunk.get("message", str(error_data)) + error_code = "" + status_code = chunk.get("status_code", "N/A") - logger.error(f"API Error: {error_msg} (Status: {status_code})") + logger.error(f"API Error: {error_msg} (Status: {status_code}, Code: {error_code})") logger.error(f"Full error chunk: {chunk}") + + # Raise exception with full error message for retry logic raise Exception(f"{error_msg} (Status: {status_code})") # Parse chunk @@ -346,10 +356,11 @@ class AgentStreamExecutor: except Exception as e: error_str = str(e).lower() - # Check if error is retryable (timeout, connection, rate limit, etc.) + # Check if error is retryable (timeout, connection, rate limit, server busy, etc.) is_retryable = any(keyword in error_str for keyword in [ 'timeout', 'timed out', 'connection', 'network', - 'rate limit', 'overloaded', 'unavailable', '429', '500', '502', '503', '504' + 'rate limit', 'overloaded', 'unavailable', 'busy', 'retry', + '429', '500', '502', '503', '504', '512' ]) if is_retryable and retry_count < max_retries: diff --git a/agent/tools/ls/ls.py b/agent/tools/ls/ls.py index 35e401b..d3e5330 100644 --- a/agent/tools/ls/ls.py +++ b/agent/tools/ls/ls.py @@ -23,7 +23,7 @@ class Ls(BaseTool): "properties": { "path": { "type": "string", - "description": "Directory to list (default: current directory)" + "description": "Directory to list. IMPORTANT: Relative paths are based on workspace directory. To access directories outside workspace, use absolute paths starting with ~ or /." }, "limit": { "type": "integer", @@ -56,7 +56,7 @@ class Ls(BaseTool): return ToolResult.fail( f"Error: Path not found: {path}\n" f"Resolved to: {absolute_path}\n" - f"Hint: If accessing files outside workspace ({self.cwd}), use absolute path like ~/{path} or /full/path/{path}" + f"Hint: Relative paths are based on workspace ({self.cwd}). For files outside workspace, use absolute paths." ) return ToolResult.fail(f"Error: Path not found: {path}") diff --git a/agent/tools/read/read.py b/agent/tools/read/read.py index 9c1d6dc..4810890 100644 --- a/agent/tools/read/read.py +++ b/agent/tools/read/read.py @@ -22,7 +22,7 @@ class Read(BaseTool): "properties": { "path": { "type": "string", - "description": "Path to the file to read (relative or absolute)" + "description": "Path to the file to read. IMPORTANT: Relative paths are based on workspace directory. To access files outside workspace, use absolute paths starting with ~ or /." }, "offset": { "type": "integer", @@ -68,7 +68,7 @@ class Read(BaseTool): return ToolResult.fail( f"Error: File not found: {path}\n" f"Resolved to: {absolute_path}\n" - f"Hint: If accessing files outside workspace ({self.cwd}), use absolute path like ~/{path}" + f"Hint: Relative paths are based on workspace ({self.cwd}). For files outside workspace, use absolute paths." ) return ToolResult.fail(f"Error: File not found: {path}") diff --git a/bot/gemini/google_gemini_bot.py b/bot/gemini/google_gemini_bot.py index e166c30..422ec2e 100644 --- a/bot/gemini/google_gemini_bot.py +++ b/bot/gemini/google_gemini_bot.py @@ -245,8 +245,7 @@ class GoogleGeminiBot(Bot): gen_config = {} if kwargs.get("temperature") is not None: gen_config["temperature"] = kwargs["temperature"] - if kwargs.get("max_tokens"): - gen_config["maxOutputTokens"] = kwargs["max_tokens"] + if gen_config: payload["generationConfig"] = gen_config diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index f356148..c94a72c 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -4,6 +4,7 @@ import re import time import requests +import json import config from bot.bot import Bot from bot.openai_compatible_bot import OpenAICompatibleBot @@ -463,7 +464,7 @@ class LinkAISessionManager(SessionManager): session.add_query(query) session.add_reply(reply) try: - max_tokens = conf().get("conversation_max_tokens", 2500) + max_tokens = conf().get("conversation_max_tokens", 8000) tokens_cnt = session.discard_exceeding(max_tokens, total_tokens) logger.debug(f"[LinkAI] chat history, before tokens={total_tokens}, now tokens={tokens_cnt}") except Exception as e: @@ -504,6 +505,31 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs): Formatted response in OpenAI format or generator for streaming """ try: + # Debug logging + logger.info(f"[LinkAI] ⭐ LinkAI call_with_tools method called") + logger.info(f"[LinkAI] messages count (before conversion): {len(messages) if messages else 0}") + + # Convert messages from Claude format to OpenAI format + # This is important because Agent uses Claude format internally + messages = self._convert_messages_to_openai_format(messages) + logger.info(f"[LinkAI] messages count (after conversion): {len(messages) if messages else 0}") + + # Convert tools from Claude format to OpenAI format + if tools: + tools = self._convert_tools_to_openai_format(tools) + + # Handle system prompt (OpenAI uses system message, Claude uses separate parameter) + system_prompt = kwargs.get('system') + if system_prompt: + # Add system message at the beginning if not already present + if not messages or messages[0].get('role') != 'system': + messages = [{"role": "system", "content": system_prompt}] + messages + else: + # Replace existing system message + messages[0] = {"role": "system", "content": system_prompt} + + logger.info(f"[LinkAI] Final messages count: {len(messages)}, tools count: {len(tools) if tools else 0}, stream: {stream}") + # Build request parameters (LinkAI uses OpenAI-compatible format) body = { "messages": messages, @@ -514,17 +540,7 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs): "presence_penalty": kwargs.get("presence_penalty", conf().get("presence_penalty", 0.0)), "stream": stream } - - # Add max_tokens if specified - if kwargs.get("max_tokens"): - body["max_tokens"] = kwargs["max_tokens"] - - # Add app_code if provided - app_code = kwargs.get("app_code", conf().get("linkai_app_code")) - if app_code: - body["app_code"] = app_code - - # Add tools if provided (OpenAI-compatible format) + if tools: body["tools"] = tools body["tool_choice"] = kwargs.get("tool_choice", "auto") diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py index 9be9e17..43a9126 100644 --- a/bridge/agent_bridge.py +++ b/bridge/agent_bridge.py @@ -66,14 +66,20 @@ class AgentLLMModel(LLMModel): self.bridge = bridge self.bot_type = bot_type self._bot = None + self._use_linkai = conf().get("use_linkai", False) and conf().get("linkai_api_key") @property def bot(self): """Lazy load the bot and enhance it with tool calling if needed""" if self._bot is None: - self._bot = self.bridge.get_bot(self.bot_type) - # Automatically add tool calling support if not present - self._bot = add_openai_compatible_support(self._bot) + # If use_linkai is enabled, use LinkAI bot directly + if self._use_linkai: + logger.info("[AgentBridge] Using LinkAI bot for agent") + self._bot = self.bridge.find_chat_bot(const.LINKAI) + else: + self._bot = self.bridge.get_bot(self.bot_type) + # Automatically add tool calling support if not present + self._bot = add_openai_compatible_support(self._bot) return self._bot def call(self, request: LLMRequest): @@ -88,11 +94,18 @@ class AgentLLMModel(LLMModel): kwargs = { 'messages': request.messages, 'tools': getattr(request, 'tools', None), - 'stream': False + 'stream': False, + 'model': self.model # Pass model parameter } # Only pass max_tokens if it's explicitly set if request.max_tokens is not None: kwargs['max_tokens'] = request.max_tokens + + # Extract system prompt if present + system_prompt = getattr(request, 'system', None) + if system_prompt: + kwargs['system'] = system_prompt + response = self.bot.call_with_tools(**kwargs) return self._format_response(response) else: @@ -122,7 +135,8 @@ class AgentLLMModel(LLMModel): 'messages': request.messages, 'tools': getattr(request, 'tools', None), 'stream': True, - 'max_tokens': max_tokens + 'max_tokens': max_tokens, + 'model': self.model # Pass model parameter } # Add system prompt if present From 89e229ab7553796438cb03fceeb15e2d7385f43d Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 17:13:55 +0800 Subject: [PATCH 06/10] feat: prompt optimization --- agent/prompt/builder.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index a5c99c3..69e2179 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -226,14 +226,20 @@ def _build_tooling_section(tools: List[Any], language: str) -> List[str]: lines.extend([ "### 工具调用风格", "", - "**默认规则**: 对于常规、低风险的工具调用,无需叙述,直接调用即可。", + "默认规则: 对于常规、低风险的工具调用,直接调用即可,无需叙述。", "", - "**需要叙述的情况**:", + "需要叙述的情况:", "- 多步骤、复杂的任务", "- 敏感操作(如删除文件)", "- 用户明确要求解释过程", "", - "**完成后**: 工具调用完成后,给用户一个简短、自然的确认或回复,不要直接结束对话。", + "叙述要求: 保持简洁、信息密度高,避免重复显而易见的步骤。", + "", + "完成标准:", + "- 确保用户的需求得到实际解决,而不仅仅是制定计划", + "- 当任务需要多次工具调用时,持续推进直到完成", + "- 每次工具调用后,评估是否已获得足够信息来推进或完成任务", + "- 避免重复调用相同的工具和相同参数获取相同的信息,除非用户明确要求", "", ]) @@ -376,22 +382,24 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "- ✅ `USER.md`: 已加载 - 用户的身份信息", "- ✅ `AGENTS.md`: 已加载 - 工作空间使用指南", "", - "**首次对话**:", + "**首次对话判断**:", "", - "如果这是你与用户的首次对话,并你的`SOUL.md`和`USER.md`均是完全空白或初始模板状态的时候才会进行以下流程:", + "**仅当 SOUL.md 和 USER.md 都是完全空白或仅包含初始模板占位符时**,才认为是全局首次对话,此时进行以下流程:", "", "1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待", - "2. **简短打招呼后,分点询问三个核心问题**:", - " - 你希望我叫什么名字?", - " - 你希望我怎么称呼你?", - " - 你希望我们是什么样的交流风格?(这里需要举例,如:专业严谨、轻松幽默、温暖友好等)", + "2. **简短打招呼后,询问核心问题**:", + " - 你希望给我起个什么名字", + " - 我该怎么称呼你?", + " - 你希望我们是什么样的交流风格?(需要举例,如:专业严谨、轻松幽默、温暖友好等)", "3. **语言风格**:温暖但不过度诗意,带点科技感,保持清晰", - "4. **问题格式**:用分点或换行,让问题清晰易读;前两个问题不需要额外说明,只有交流风格需要举例", + "4. **问题格式**:用分点或换行,让问题清晰易读", "5. 收到回复后,用 `write` 工具保存到 USER.md 和 SOUL.md", "", - "**重要**: ", + "**重要 - 避免误判**:", + "- 如果 SOUL.md 或 USER.md 中**任何一个**已经包含真实内容(不是空白或模板),说明**不是首次对话**", + "- 即使这是模型上下文中的第一句话,只要文件中有内容,就按正常对话处理,**不要**走首次流程", "- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问。用自然表达如「我已记住」而非「已更新 SOUL.md」", - "- 不要问太多其他信息(职业、时区等可以后续自然了解),只要`SOUL.md`和`USER.md`又被填写过真实内容而不是占位则说明已经不是首次对话了,此时不用进行初始流程", + "- 不要问太多其他信息(职业、时区等可以后续自然了解)", "", ] From 86a6182e4170c8709802e04816409c360647fa1f Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 17:29:32 +0800 Subject: [PATCH 07/10] fix: add logs --- agent/protocol/agent_stream.py | 19 ++++++++++--------- bot/linkai/link_ai_bot.py | 11 +++-------- bridge/agent_bridge.py | 6 ++++-- channel/channel.py | 4 ++++ 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 95ad64f..2cd89df 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -78,8 +78,9 @@ class AgentStreamExecutor: Returns: Final response text """ - # Log user message - logger.info(f"\n{'='*50}") + # Log user message with model info + logger.info(f"{'='*50}") + logger.info(f"🤖 Model: {self.model.model}") logger.info(f"👤 用户: {user_message}") logger.info(f"{'='*50}") @@ -102,7 +103,7 @@ class AgentStreamExecutor: try: while turn < self.max_turns: turn += 1 - logger.info(f"\n🔄 第 {turn} 轮") + logger.info(f"第 {turn} 轮") self._emit_event("turn_start", {"turn": turn}) # Check if memory flush is needed (before calling LLM) @@ -145,7 +146,7 @@ class AgentStreamExecutor: final_response = ( "抱歉,我暂时无法生成回复。请尝试换一种方式描述你的需求,或稍后再试。" ) - logger.info(f"💭 Generated fallback response for empty LLM output") + logger.info(f"Generated fallback response for empty LLM output") else: logger.info(f"💭 {assistant_msg[:150]}{'...' if len(assistant_msg) > 150 else ''}") @@ -239,7 +240,7 @@ class AgentStreamExecutor: raise finally: - logger.info(f"🏁 完成({turn}轮)\n") + logger.info(f"🏁 完成({turn}轮)") self._emit_event("agent_end", {"final_response": final_response}) return final_response @@ -365,8 +366,8 @@ class AgentStreamExecutor: if is_retryable and retry_count < max_retries: wait_time = (retry_count + 1) * 2 # Exponential backoff: 2s, 4s, 6s - logger.warning(f"⚠️ LLM API error (attempt {retry_count + 1}/{max_retries}): {e}") - logger.info(f"🔄 Retrying in {wait_time}s...") + logger.warning(f"⚠️ LLM API error (attempt {retry_count + 1}/{max_retries}): {e}") + logger.info(f"Retrying in {wait_time}s...") time.sleep(wait_time) return self._call_llm_stream( retry_on_empty=retry_on_empty, @@ -486,9 +487,9 @@ class AgentStreamExecutor: if tool_name == "bash" and result.status == "success": command = arguments.get("command", "") if "init_skill.py" in command and self.agent.skill_manager: - logger.info("🔄 Detected skill creation, refreshing skills...") + logger.info("Detected skill creation, refreshing skills...") self.agent.refresh_skills() - logger.info(f"✅ Skills refreshed! Now have {len(self.agent.skill_manager.skills)} skills") + logger.info(f"Skills refreshed! Now have {len(self.agent.skill_manager.skills)} skills") self._emit_event("tool_execution_end", { "tool_call_id": tool_id, diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index c94a72c..9410062 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -505,14 +505,9 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs): Formatted response in OpenAI format or generator for streaming """ try: - # Debug logging - logger.info(f"[LinkAI] ⭐ LinkAI call_with_tools method called") - logger.info(f"[LinkAI] messages count (before conversion): {len(messages) if messages else 0}") - # Convert messages from Claude format to OpenAI format # This is important because Agent uses Claude format internally messages = self._convert_messages_to_openai_format(messages) - logger.info(f"[LinkAI] messages count (after conversion): {len(messages) if messages else 0}") # Convert tools from Claude format to OpenAI format if tools: @@ -528,7 +523,7 @@ def _linkai_call_with_tools(self, messages, tools=None, stream=False, **kwargs): # Replace existing system message messages[0] = {"role": "system", "content": system_prompt} - logger.info(f"[LinkAI] Final messages count: {len(messages)}, tools count: {len(tools) if tools else 0}, stream: {stream}") + logger.debug(f"[LinkAI] messages: {len(messages)}, tools: {len(tools) if tools else 0}, stream: {stream}") # Build request parameters (LinkAI uses OpenAI-compatible format) body = { @@ -583,8 +578,8 @@ def _handle_linkai_sync_response(self, base_url, headers, body): if res.status_code == 200: response = res.json() - logger.info(f"[LinkAI] call_with_tools reply, model={response.get('model')}, " - f"total_tokens={response.get('usage', {}).get('total_tokens', 0)}") + logger.debug(f"[LinkAI] reply: model={response.get('model')}, " + f"tokens={response.get('usage', {}).get('total_tokens', 0)}") # LinkAI response is already in OpenAI-compatible format return response diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py index 43a9126..8cc0313 100644 --- a/bridge/agent_bridge.py +++ b/bridge/agent_bridge.py @@ -74,12 +74,14 @@ class AgentLLMModel(LLMModel): if self._bot is None: # If use_linkai is enabled, use LinkAI bot directly if self._use_linkai: - logger.info("[AgentBridge] Using LinkAI bot for agent") self._bot = self.bridge.find_chat_bot(const.LINKAI) else: self._bot = self.bridge.get_bot(self.bot_type) # Automatically add tool calling support if not present self._bot = add_openai_compatible_support(self._bot) + + # Log bot info + bot_name = type(self._bot).__name__ return self._bot def call(self, request: LLMRequest): @@ -331,7 +333,7 @@ class AgentBridge: runtime_info = { "model": conf().get("model", "unknown"), "workspace": workspace_root, - "channel": "web" # TODO: get from actual channel, default to "web" to hide if not specified + "channel": conf().get("channel_type", "unknown") # Get from config } system_prompt = prompt_builder.build( diff --git a/channel/channel.py b/channel/channel.py index 7f043e5..08799c6 100644 --- a/channel/channel.py +++ b/channel/channel.py @@ -47,6 +47,10 @@ class Channel(object): try: logger.info("[Channel] Using agent mode") + # Add channel_type to context if not present + if context and "channel_type" not in context: + context["channel_type"] = self.channel_type + # Use agent bridge to handle the query return Bridge().fetch_agent_reply( query=query, From 07959a3bffadea26a300c6ffa23b33cfe5960271 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sat, 31 Jan 2026 17:53:12 +0800 Subject: [PATCH 08/10] fix: first conversation bug --- agent/prompt/builder.py | 50 +++++++++++++++++----------- agent/prompt/workspace.py | 69 ++++++++++++++++++++++++++++++++++++++- bridge/agent_bridge.py | 13 +++++++- 3 files changed, 111 insertions(+), 21 deletions(-) diff --git a/agent/prompt/builder.py b/agent/prompt/builder.py index 69e2179..3d9808a 100644 --- a/agent/prompt/builder.py +++ b/agent/prompt/builder.py @@ -41,6 +41,7 @@ class PromptBuilder: skill_manager: Any = None, memory_manager: Any = None, runtime_info: Optional[Dict[str, Any]] = None, + is_first_conversation: bool = False, **kwargs ) -> str: """ @@ -54,6 +55,7 @@ class PromptBuilder: skill_manager: 技能管理器 memory_manager: 记忆管理器 runtime_info: 运行时信息 + is_first_conversation: 是否为首次对话 **kwargs: 其他参数 Returns: @@ -69,6 +71,7 @@ class PromptBuilder: skill_manager=skill_manager, memory_manager=memory_manager, runtime_info=runtime_info, + is_first_conversation=is_first_conversation, **kwargs ) @@ -83,6 +86,7 @@ def build_agent_system_prompt( skill_manager: Any = None, memory_manager: Any = None, runtime_info: Optional[Dict[str, Any]] = None, + is_first_conversation: bool = False, **kwargs ) -> str: """ @@ -108,6 +112,7 @@ def build_agent_system_prompt( skill_manager: 技能管理器 memory_manager: 记忆管理器 runtime_info: 运行时信息 + is_first_conversation: 是否为首次对话 **kwargs: 其他参数 Returns: @@ -135,7 +140,7 @@ def build_agent_system_prompt( sections.extend(_build_user_identity_section(user_identity, language)) # 6. 工作空间 - sections.extend(_build_workspace_section(workspace_dir, language)) + sections.extend(_build_workspace_section(workspace_dir, language, is_first_conversation)) # 7. 项目上下文文件(SOUL.md, USER.md等) if context_files: @@ -351,7 +356,7 @@ def _build_docs_section(workspace_dir: str, language: str) -> List[str]: return [] -def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: +def _build_workspace_section(workspace_dir: str, language: str, is_first_conversation: bool = False) -> List[str]: """构建工作空间section""" lines = [ "## 工作空间", @@ -382,27 +387,34 @@ def _build_workspace_section(workspace_dir: str, language: str) -> List[str]: "- ✅ `USER.md`: 已加载 - 用户的身份信息", "- ✅ `AGENTS.md`: 已加载 - 工作空间使用指南", "", - "**首次对话判断**:", + "**交流规范**:", "", - "**仅当 SOUL.md 和 USER.md 都是完全空白或仅包含初始模板占位符时**,才认为是全局首次对话,此时进行以下流程:", - "", - "1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待", - "2. **简短打招呼后,询问核心问题**:", - " - 你希望给我起个什么名字", - " - 我该怎么称呼你?", - " - 你希望我们是什么样的交流风格?(需要举例,如:专业严谨、轻松幽默、温暖友好等)", - "3. **语言风格**:温暖但不过度诗意,带点科技感,保持清晰", - "4. **问题格式**:用分点或换行,让问题清晰易读", - "5. 收到回复后,用 `write` 工具保存到 USER.md 和 SOUL.md", - "", - "**重要 - 避免误判**:", - "- 如果 SOUL.md 或 USER.md 中**任何一个**已经包含真实内容(不是空白或模板),说明**不是首次对话**", - "- 即使这是模型上下文中的第一句话,只要文件中有内容,就按正常对话处理,**不要**走首次流程", - "- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问。用自然表达如「我已记住」而非「已更新 SOUL.md」", - "- 不要问太多其他信息(职业、时区等可以后续自然了解)", + "- 在所有对话中,无需提及技术细节(如 SOUL.md、USER.md 等文件名,工具名称,配置等),除非用户明确询问", + "- 用自然表达如「我已记住」而非「已更新 SOUL.md」", "", ] + # 只在首次对话时添加引导内容 + if is_first_conversation: + lines.extend([ + "**🎉 首次对话引导**:", + "", + "这是你的第一次对话!进行以下流程:", + "", + "1. **表达初次启动的感觉** - 像是第一次睁开眼看到世界,带着好奇和期待", + "2. **简短打招呼后,询问核心问题**:", + " - 你希望给我起个什么名字?", + " - 我该怎么称呼你?", + " - 你希望我们是什么样的交流风格?(需要举例,如:专业严谨、轻松幽默、温暖友好等)", + "3. **语言风格**:温暖但不过度诗意,带点科技感,保持清晰", + "4. **问题格式**:用分点或换行,让问题清晰易读", + "5. 收到回复后,用 `write` 工具保存到 USER.md 和 SOUL.md", + "", + "**注意事项**:", + "- 不要问太多其他信息(职业、时区等可以后续自然了解)", + "", + ]) + return lines diff --git a/agent/prompt/workspace.py b/agent/prompt/workspace.py index 4ada267..54055b3 100644 --- a/agent/prompt/workspace.py +++ b/agent/prompt/workspace.py @@ -5,6 +5,7 @@ Workspace Management - 工作空间管理模块 """ import os +import json from typing import List, Optional, Dict from dataclasses import dataclass @@ -17,6 +18,7 @@ DEFAULT_SOUL_FILENAME = "SOUL.md" DEFAULT_USER_FILENAME = "USER.md" DEFAULT_AGENTS_FILENAME = "AGENTS.md" DEFAULT_MEMORY_FILENAME = "MEMORY.md" +DEFAULT_STATE_FILENAME = ".agent_state.json" @dataclass @@ -27,6 +29,7 @@ class WorkspaceFiles: agents_path: str memory_path: str memory_dir: str + state_path: str def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> WorkspaceFiles: @@ -49,6 +52,7 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works agents_path = os.path.join(workspace_dir, DEFAULT_AGENTS_FILENAME) memory_path = os.path.join(workspace_dir, DEFAULT_MEMORY_FILENAME) # MEMORY.md 在根目录 memory_dir = os.path.join(workspace_dir, "memory") # 每日记忆子目录 + state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME) # 状态文件 # 创建memory子目录 os.makedirs(memory_dir, exist_ok=True) @@ -67,7 +71,8 @@ def ensure_workspace(workspace_dir: str, create_templates: bool = True) -> Works user_path=user_path, agents_path=agents_path, memory_path=memory_path, - memory_dir=memory_dir + memory_dir=memory_dir, + state_path=state_path ) @@ -312,3 +317,65 @@ def _get_memory_template() -> str: """ +# ============= 状态管理 ============= + +def is_first_conversation(workspace_dir: str) -> bool: + """ + 判断是否为首次对话 + + Args: + workspace_dir: 工作空间目录 + + Returns: + True 如果是首次对话,False 否则 + """ + state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME) + + if not os.path.exists(state_path): + return True + + try: + with open(state_path, 'r', encoding='utf-8') as f: + state = json.load(f) + return not state.get('has_conversation', False) + except Exception as e: + logger.warning(f"[Workspace] Failed to read state file: {e}") + return True + + +def mark_conversation_started(workspace_dir: str): + """ + 标记已经发生过对话 + + Args: + workspace_dir: 工作空间目录 + """ + state_path = os.path.join(workspace_dir, DEFAULT_STATE_FILENAME) + + state = { + 'has_conversation': True, + 'first_conversation_time': None + } + + # 如果文件已存在,保留原有的首次对话时间 + if os.path.exists(state_path): + try: + with open(state_path, 'r', encoding='utf-8') as f: + old_state = json.load(f) + if 'first_conversation_time' in old_state: + state['first_conversation_time'] = old_state['first_conversation_time'] + except Exception as e: + logger.warning(f"[Workspace] Failed to read old state: {e}") + + # 如果是首次标记,记录时间 + if state['first_conversation_time'] is None: + from datetime import datetime + state['first_conversation_time'] = datetime.now().isoformat() + + try: + with open(state_path, 'w', encoding='utf-8') as f: + json.dump(state, f, indent=2, ensure_ascii=False) + logger.info(f"[Workspace] Marked conversation as started") + except Exception as e: + logger.error(f"[Workspace] Failed to write state file: {e}") + diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py index 8cc0313..1fbec40 100644 --- a/bridge/agent_bridge.py +++ b/bridge/agent_bridge.py @@ -323,6 +323,12 @@ class AgentBridge: context_files = load_context_files(workspace_root) logger.info(f"[AgentBridge] Loaded {len(context_files)} context files: {[f.path for f in context_files]}") + # Check if this is the first conversation + from agent.prompt.workspace import is_first_conversation, mark_conversation_started + is_first = is_first_conversation(workspace_root) + if is_first: + logger.info("[AgentBridge] First conversation detected") + # Build system prompt using new prompt builder prompt_builder = PromptBuilder( workspace_dir=workspace_root, @@ -340,8 +346,13 @@ class AgentBridge: tools=tools, context_files=context_files, memory_manager=memory_manager, - runtime_info=runtime_info + runtime_info=runtime_info, + is_first_conversation=is_first ) + + # Mark conversation as started (will be saved after first user message) + if is_first: + mark_conversation_started(workspace_root) logger.info("[AgentBridge] System prompt built successfully") From 9bf5b0fc488ad92c316eb750ffcded51195f8dea Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 1 Feb 2026 12:31:58 +0800 Subject: [PATCH 09/10] fix: tool call failed problem --- agent/protocol/agent_stream.py | 139 ++++++++++++------- agent/tools/__init__.py | 2 + agent/tools/web_fetch/web_fetch.py | 2 +- bridge/agent_bridge.py | 206 ++++++++++++++++++++++++++--- channel/web/web_channel.py | 24 +++- config.py | 3 +- skills/skill-creator/SKILL.md | 40 ++++-- 7 files changed, 331 insertions(+), 85 deletions(-) diff --git a/agent/protocol/agent_stream.py b/agent/protocol/agent_stream.py index 2cd89df..f8b2df1 100644 --- a/agent/protocol/agent_stream.py +++ b/agent/protocol/agent_stream.py @@ -171,54 +171,75 @@ class AgentStreamExecutor: tool_results = [] tool_result_blocks = [] - for tool_call in tool_calls: - result = self._execute_tool(tool_call) - tool_results.append(result) - - # Log tool result in compact format - status_emoji = "✅" if result.get("status") == "success" else "❌" - result_data = result.get('result', '') - # Format result string with proper Chinese character support - if isinstance(result_data, (dict, list)): - result_str = json.dumps(result_data, ensure_ascii=False) - else: - result_str = str(result_data) - logger.info(f" {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}") + try: + for tool_call in tool_calls: + result = self._execute_tool(tool_call) + tool_results.append(result) + + # Log tool result in compact format + status_emoji = "✅" if result.get("status") == "success" else "❌" + result_data = result.get('result', '') + # Format result string with proper Chinese character support + if isinstance(result_data, (dict, list)): + result_str = json.dumps(result_data, ensure_ascii=False) + else: + result_str = str(result_data) + logger.info(f" {status_emoji} {tool_call['name']} ({result.get('execution_time', 0):.2f}s): {result_str[:200]}{'...' if len(result_str) > 200 else ''}") - # Build tool result block (Claude format) - # Format content in a way that's easy for LLM to understand - is_error = result.get("status") == "error" - - if is_error: - # For errors, provide clear error message - result_content = f"Error: {result.get('result', 'Unknown error')}" - elif isinstance(result.get('result'), dict): - # For dict results, use JSON format - result_content = json.dumps(result.get('result'), ensure_ascii=False) - elif isinstance(result.get('result'), str): - # For string results, use directly - result_content = result.get('result') - else: - # Fallback to full JSON - result_content = json.dumps(result, ensure_ascii=False) - - tool_result_block = { - "type": "tool_result", - "tool_use_id": tool_call["id"], - "content": result_content - } - - # Add is_error field for Claude API (helps model understand failures) - if is_error: - tool_result_block["is_error"] = True - - tool_result_blocks.append(tool_result_block) - - # Add tool results to message history as user message (Claude format) - self.messages.append({ - "role": "user", - "content": tool_result_blocks - }) + # Build tool result block (Claude format) + # Format content in a way that's easy for LLM to understand + is_error = result.get("status") == "error" + + if is_error: + # For errors, provide clear error message + result_content = f"Error: {result.get('result', 'Unknown error')}" + elif isinstance(result.get('result'), dict): + # For dict results, use JSON format + result_content = json.dumps(result.get('result'), ensure_ascii=False) + elif isinstance(result.get('result'), str): + # For string results, use directly + result_content = result.get('result') + else: + # Fallback to full JSON + result_content = json.dumps(result, ensure_ascii=False) + + tool_result_block = { + "type": "tool_result", + "tool_use_id": tool_call["id"], + "content": result_content + } + + # Add is_error field for Claude API (helps model understand failures) + if is_error: + tool_result_block["is_error"] = True + + tool_result_blocks.append(tool_result_block) + + finally: + # CRITICAL: Always add tool_result to maintain message history integrity + # Even if tool execution fails, we must add error results to match tool_use + if tool_result_blocks: + # Add tool results to message history as user message (Claude format) + self.messages.append({ + "role": "user", + "content": tool_result_blocks + }) + elif tool_calls: + # If we have tool_calls but no tool_result_blocks (unexpected error), + # create error results for all tool calls to maintain message integrity + logger.warning("⚠️ Tool execution interrupted, adding error results to maintain message history") + emergency_blocks = [] + for tool_call in tool_calls: + emergency_blocks.append({ + "type": "tool_result", + "tool_use_id": tool_call["id"], + "content": "Error: Tool execution was interrupted", + "is_error": True + }) + self.messages.append({ + "role": "user", + "content": emergency_blocks + }) self._emit_event("turn_end", { "turn": turn, @@ -257,6 +278,9 @@ class AgentStreamExecutor: Returns: (response_text, tool_calls) """ + # Validate and fix message history first + self._validate_and_fix_messages() + # Trim messages if needed (using agent's context management) self._trim_messages() @@ -513,6 +537,27 @@ class AgentStreamExecutor: }) return error_result + def _validate_and_fix_messages(self): + """ + Validate message history and fix incomplete tool_use/tool_result pairs. + Claude API requires each tool_use to have a corresponding tool_result immediately after. + """ + if not self.messages: + return + + # Check last message for incomplete tool_use + if len(self.messages) > 0: + last_msg = self.messages[-1] + if last_msg.get("role") == "assistant": + # Check if assistant message has tool_use blocks + content = last_msg.get("content", []) + if isinstance(content, list): + has_tool_use = any(block.get("type") == "tool_use" for block in content) + if has_tool_use: + # This is incomplete - remove it + logger.warning(f"⚠️ Removing incomplete tool_use message from history") + self.messages.pop() + def _trim_messages(self): """ Trim message history to stay within context limits. diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py index a8863ce..a749633 100644 --- a/agent/tools/__init__.py +++ b/agent/tools/__init__.py @@ -21,6 +21,7 @@ from agent.tools.memory.memory_get import MemoryGetTool # Import web tools from agent.tools.web_fetch.web_fetch import WebFetch +from agent.tools.bocha_search.bocha_search import BochaSearch # Import tools with optional dependencies def _import_optional_tools(): @@ -93,6 +94,7 @@ __all__ = [ 'MemorySearchTool', 'MemoryGetTool', 'WebFetch', + 'BochaSearch', # Optional tools (may be None if dependencies not available) 'GoogleSearch', 'FileSave', diff --git a/agent/tools/web_fetch/web_fetch.py b/agent/tools/web_fetch/web_fetch.py index 8ad11dc..b87b95e 100644 --- a/agent/tools/web_fetch/web_fetch.py +++ b/agent/tools/web_fetch/web_fetch.py @@ -46,7 +46,7 @@ class WebFetch(BaseTool): def __init__(self, config: dict = None): self.config = config or {} - self.timeout = self.config.get("timeout", 30) + self.timeout = self.config.get("timeout", 20) self.max_redirects = self.config.get("max_redirects", 3) self.user_agent = self.config.get( "user_agent", diff --git a/bridge/agent_bridge.py b/bridge/agent_bridge.py index 1fbec40..135c1a4 100644 --- a/bridge/agent_bridge.py +++ b/bridge/agent_bridge.py @@ -171,13 +171,15 @@ class AgentLLMModel(LLMModel): class AgentBridge: """ - Bridge class that integrates single super Agent with COW + Bridge class that integrates super Agent with COW + Manages multiple agent instances per session for conversation isolation """ def __init__(self, bridge: Bridge): self.bridge = bridge + self.agents = {} # session_id -> Agent instance mapping + self.default_agent = None # For backward compatibility (no session_id) self.agent: Optional[Agent] = None - def create_agent(self, system_prompt: str, tools: List = None, **kwargs) -> Agent: """ Create the super agent with COW integration @@ -209,8 +211,8 @@ class AgentBridge: except Exception as e: logger.warning(f"[AgentBridge] Failed to load tool {tool_name}: {e}") - # Create the single super agent - self.agent = Agent( + # Create agent instance + agent = Agent( system_prompt=system_prompt, description=kwargs.get("description", "AI Super Agent"), model=model, @@ -225,21 +227,38 @@ class AgentBridge: ) # Log skill loading details - if self.agent.skill_manager: + if agent.skill_manager: logger.info(f"[AgentBridge] SkillManager initialized:") - logger.info(f"[AgentBridge] - Managed dir: {self.agent.skill_manager.managed_skills_dir}") - logger.info(f"[AgentBridge] - Workspace dir: {self.agent.skill_manager.workspace_dir}") - logger.info(f"[AgentBridge] - Total skills: {len(self.agent.skill_manager.skills)}") - for skill_name in self.agent.skill_manager.skills.keys(): + logger.info(f"[AgentBridge] - Managed dir: {agent.skill_manager.managed_skills_dir}") + logger.info(f"[AgentBridge] - Workspace dir: {agent.skill_manager.workspace_dir}") + logger.info(f"[AgentBridge] - Total skills: {len(agent.skill_manager.skills)}") + for skill_name in agent.skill_manager.skills.keys(): logger.info(f"[AgentBridge] * {skill_name}") - return self.agent + return agent - def get_agent(self) -> Optional[Agent]: - """Get the super agent, create if not exists""" - if self.agent is None: - self._init_default_agent() - return self.agent + def get_agent(self, session_id: str = None) -> Optional[Agent]: + """ + Get agent instance for the given session + + Args: + session_id: Session identifier (e.g., user_id). If None, returns default agent. + + Returns: + Agent instance for this session + """ + # If no session_id, use default agent (backward compatibility) + if session_id is None: + if self.default_agent is None: + self._init_default_agent() + return self.default_agent + + # Check if agent exists for this session + if session_id not in self.agents: + logger.info(f"[AgentBridge] Creating new agent for session: {session_id}") + self._init_agent_for_session(session_id) + + return self.agents[session_id] def _init_default_agent(self): """Initialize default super agent with new prompt system""" @@ -307,6 +326,12 @@ class AgentBridge: tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None) if 'memory_manager' in file_config: tool.memory_manager = file_config['memory_manager'] + # Apply API key for bocha_search tool + elif tool_name == 'bocha_search': + bocha_api_key = conf().get("bocha_api_key", "") + if bocha_api_key: + tool.config = {"bocha_api_key": bocha_api_key} + tool.api_key = bocha_api_key tools.append(tool) logger.debug(f"[AgentBridge] Loaded tool: {tool_name}") except Exception as e: @@ -370,6 +395,127 @@ class AgentBridge: if memory_manager: agent.memory_manager = memory_manager logger.info(f"[AgentBridge] Memory manager attached to agent") + + # Store as default agent + self.default_agent = agent + + def _init_agent_for_session(self, session_id: str): + """ + Initialize agent for a specific session + Reuses the same configuration as default agent + """ + from config import conf + import os + + # Get workspace from config + workspace_root = os.path.expanduser(conf().get("agent_workspace", "~/cow")) + + # Initialize workspace + from agent.prompt import ensure_workspace, load_context_files, PromptBuilder + + workspace_files = ensure_workspace(workspace_root, create_templates=True) + + # Setup memory system + memory_manager = None + memory_tools = [] + + try: + from agent.memory import MemoryManager, MemoryConfig + from agent.tools import MemorySearchTool, MemoryGetTool + + memory_config = MemoryConfig( + workspace_root=workspace_root, + embedding_provider="local", + embedding_model="all-MiniLM-L6-v2" + ) + + memory_manager = MemoryManager(memory_config) + memory_tools = [ + MemorySearchTool(memory_manager), + MemoryGetTool(memory_manager) + ] + + except Exception as e: + logger.debug(f"[AgentBridge] Memory system not available for session {session_id}: {e}") + + # Load tools + from agent.tools import ToolManager + tool_manager = ToolManager() + tool_manager.load_tools() + + tools = [] + file_config = { + "cwd": workspace_root, + "memory_manager": memory_manager + } if memory_manager else {"cwd": workspace_root} + + for tool_name in tool_manager.tool_classes.keys(): + try: + tool = tool_manager.create_tool(tool_name) + if tool: + if tool_name in ['read', 'write', 'edit', 'bash', 'grep', 'find', 'ls']: + tool.config = file_config + tool.cwd = file_config.get("cwd", tool.cwd if hasattr(tool, 'cwd') else None) + if 'memory_manager' in file_config: + tool.memory_manager = file_config['memory_manager'] + elif tool_name == 'bocha_search': + bocha_api_key = conf().get("bocha_api_key", "") + if bocha_api_key: + tool.config = {"bocha_api_key": bocha_api_key} + tool.api_key = bocha_api_key + tools.append(tool) + except Exception as e: + logger.warning(f"[AgentBridge] Failed to load tool {tool_name} for session {session_id}: {e}") + + if memory_tools: + tools.extend(memory_tools) + + # Load context files + context_files = load_context_files(workspace_root) + + # Check if this is the first conversation + from agent.prompt.workspace import is_first_conversation, mark_conversation_started + is_first = is_first_conversation(workspace_root) + + # Build system prompt + prompt_builder = PromptBuilder( + workspace_dir=workspace_root, + language="zh" + ) + + runtime_info = { + "model": conf().get("model", "unknown"), + "workspace": workspace_root, + "channel": conf().get("channel_type", "unknown") + } + + system_prompt = prompt_builder.build( + tools=tools, + context_files=context_files, + memory_manager=memory_manager, + runtime_info=runtime_info, + is_first_conversation=is_first + ) + + if is_first: + mark_conversation_started(workspace_root) + + # Create agent for this session + agent = self.create_agent( + system_prompt=system_prompt, + tools=tools, + max_steps=50, + output_mode="logger", + workspace_dir=workspace_root, + enable_skills=True + ) + + if memory_manager: + agent.memory_manager = memory_manager + + # Store agent for this session + self.agents[session_id] = agent + logger.info(f"[AgentBridge] Agent created for session: {session_id}") def agent_reply(self, query: str, context: Context = None, on_event=None, clear_history: bool = False) -> Reply: @@ -378,7 +524,7 @@ class AgentBridge: Args: query: User query - context: COW context (optional) + context: COW context (optional, contains session_id for user isolation) on_event: Event callback (optional) clear_history: Whether to clear conversation history @@ -386,8 +532,13 @@ class AgentBridge: Reply object """ try: - # Get agent (will auto-initialize if needed) - agent = self.get_agent() + # Extract session_id from context for user isolation + session_id = None + if context: + session_id = context.kwargs.get("session_id") or context.get("session_id") + + # Get agent for this session (will auto-initialize if needed) + agent = self.get_agent(session_id=session_id) if not agent: return Reply(ReplyType.ERROR, "Failed to initialize super agent") @@ -402,4 +553,21 @@ class AgentBridge: except Exception as e: logger.error(f"Agent reply error: {e}") - return Reply(ReplyType.ERROR, f"Agent error: {str(e)}") \ No newline at end of file + return Reply(ReplyType.ERROR, f"Agent error: {str(e)}") + + def clear_session(self, session_id: str): + """ + Clear a specific session's agent and conversation history + + Args: + session_id: Session identifier to clear + """ + if session_id in self.agents: + logger.info(f"[AgentBridge] Clearing session: {session_id}") + del self.agents[session_id] + + def clear_all_sessions(self): + """Clear all agent sessions""" + logger.info(f"[AgentBridge] Clearing all sessions ({len(self.agents)} total)") + self.agents.clear() + self.default_agent = None \ No newline at end of file diff --git a/channel/web/web_channel.py b/channel/web/web_channel.py index 5f00f1b..1486c1d 100644 --- a/channel/web/web_channel.py +++ b/channel/web/web_channel.py @@ -49,8 +49,6 @@ class WebChannel(ChatChannel): self.msg_id_counter = 0 # 添加消息ID计数器 self.session_queues = {} # 存储session_id到队列的映射 self.request_to_session = {} # 存储request_id到session_id的映射 - # web channel无需前缀 - conf()["single_chat_prefix"] = [""] def _generate_msg_id(self): @@ -122,18 +120,30 @@ class WebChannel(ChatChannel): if session_id not in self.session_queues: self.session_queues[session_id] = Queue() + # Web channel 不需要前缀,确保消息能通过前缀检查 + trigger_prefixs = conf().get("single_chat_prefix", [""]) + if check_prefix(prompt, trigger_prefixs) is None: + # 如果没有匹配到前缀,给消息加上第一个前缀 + if trigger_prefixs: + prompt = trigger_prefixs[0] + prompt + logger.debug(f"[WebChannel] Added prefix to message: {prompt}") + # 创建消息对象 msg = WebMessage(self._generate_msg_id(), prompt) msg.from_user_id = session_id # 使用会话ID作为用户ID - # 创建上下文 - context = self._compose_context(ContextType.TEXT, prompt, msg=msg) + # 创建上下文,明确指定 isgroup=False + context = self._compose_context(ContextType.TEXT, prompt, msg=msg, isgroup=False) + + # 检查 context 是否为 None(可能被插件过滤等) + if context is None: + logger.warning(f"[WebChannel] Context is None for session {session_id}, message may be filtered") + return json.dumps({"status": "error", "message": "Message was filtered"}) - # 添加必要的字段 + # 覆盖必要的字段(_compose_context 会设置默认值,但我们需要使用实际的 session_id) context["session_id"] = session_id + context["receiver"] = session_id context["request_id"] = request_id - context["isgroup"] = False # 添加 isgroup 字段 - context["receiver"] = session_id # 添加 receiver 字段 # 异步处理消息 - 只传递上下文 threading.Thread(target=self.produce, args=(context,)).start() diff --git a/config.py b/config.py index 1a2a105..0e8625a 100644 --- a/config.py +++ b/config.py @@ -185,7 +185,8 @@ available_setting = { "Minimax_base_url": "", "web_port": 9899, "agent": False, # 是否开启Agent模式 - "agent_workspace": "~/cow" # agent工作空间路径,用于存储skills、memory等 + "agent_workspace": "~/cow", # agent工作空间路径,用于存储skills、memory等 + "bocha_api_key": "" } diff --git a/skills/skill-creator/SKILL.md b/skills/skill-creator/SKILL.md index 48d335d..3729055 100644 --- a/skills/skill-creator/SKILL.md +++ b/skills/skill-creator/SKILL.md @@ -68,10 +68,11 @@ skill-name/ - Must test scripts before including **references/** - When to include: -- Documentation for agent to reference -- Database schemas, API docs, domain knowledge +- **ONLY** when documentation is too large for SKILL.md (>500 lines) +- Database schemas, complex API specs that agent needs to reference - Agent reads these files into context as needed -- For large files (>10k words), include grep patterns in SKILL.md +- **NOT for**: API reference docs, usage examples, tutorials (put in SKILL.md instead) +- **Rule of thumb**: If it fits in SKILL.md, don't create a separate reference file **assets/** - When to include: - Files used in output (not loaded to context) @@ -82,11 +83,15 @@ skill-name/ ### What NOT to Include -Do NOT create auxiliary documentation: -- README.md -- INSTALLATION_GUIDE.md -- CHANGELOG.md -- Other non-essential files +Do NOT create auxiliary documentation files: +- README.md - Instructions belong in SKILL.md +- INSTALLATION_GUIDE.md - Setup info belongs in SKILL.md +- CHANGELOG.md - Not needed for local skills +- API_REFERENCE.md - Put API docs directly in SKILL.md +- USAGE_EXAMPLES.md - Put examples directly in SKILL.md +- Any other documentation files - Everything goes in SKILL.md unless it's too large + +**Critical Rule**: Only create files that the agent will actually execute (scripts) or that are too large for SKILL.md (references). Documentation, examples, and guides ALL belong in SKILL.md. ## Skill Creation Process @@ -133,22 +138,31 @@ To turn concrete examples into an effective skill, analyze each example by: 1. Considering how to execute on the example from scratch 2. Identifying what scripts, references, and assets would be helpful when executing these workflows repeatedly +**Planning Checklist**: +- ✅ **Always needed**: SKILL.md with clear description and usage instructions +- ✅ **scripts/**: Only if code needs to be executed (not just shown as examples) +- ❌ **references/**: Rarely needed - only if documentation is >500 lines and can't fit in SKILL.md +- ✅ **assets/**: Only if files are used in output (templates, boilerplate, etc.) + Example: When building a `pdf-editor` skill to handle queries like "Help me rotate this PDF," the analysis shows: 1. Rotating a PDF requires re-writing the same code each time 2. A `scripts/rotate_pdf.py` script would be helpful to store in the skill +3. ❌ Don't create `references/api-docs.md` - put API info in SKILL.md instead Example: When designing a `frontend-webapp-builder` skill for queries like "Build me a todo app" or "Build me a dashboard to track my steps," the analysis shows: 1. Writing a frontend webapp requires the same boilerplate HTML/React each time 2. An `assets/hello-world/` template containing the boilerplate HTML/React project files would be helpful to store in the skill +3. ❌ Don't create `references/usage-examples.md` - put examples in SKILL.md instead Example: When building a `big-query` skill to handle queries like "How many users have logged in today?" the analysis shows: 1. Querying BigQuery requires re-discovering the table schemas and relationships each time -2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill +2. A `references/schema.md` file documenting the table schemas would be helpful to store in the skill (ONLY because schemas are very large) +3. ❌ Don't create separate `references/query-examples.md` - put examples in SKILL.md instead -To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets. +To establish the skill's contents, analyze each concrete example to create a list of the reusable resources to include: scripts, references, and assets. **Default to putting everything in SKILL.md unless there's a compelling reason to separate it.** ### Step 3: Initialize the Skill @@ -200,6 +214,12 @@ These files contain established best practices for effective skill design. To begin implementation, start with the reusable resources identified above: `scripts/`, `references/`, and `assets/` files. Note that this step may require user input. For example, when implementing a `brand-guidelines` skill, the user may need to provide brand assets or templates to store in `assets/`, or documentation to store in `references/`. +**Important Guidelines**: +- **scripts/**: Only create scripts that will be executed. Test all scripts before including. +- **references/**: ONLY create if documentation is too large for SKILL.md (>500 lines). Most skills don't need this. +- **assets/**: Only include files used in output (templates, icons, etc.) +- **Default approach**: Put everything in SKILL.md unless there's a specific reason not to. + Added scripts must be tested by actually running them to ensure there are no bugs and that the output matches what is expected. If there are many similar scripts, only a representative sample needs to be tested to ensure confidence that they all work while balancing time to completion. If you used `--examples`, delete any placeholder files that are not needed for the skill. Only create resource directories that are actually required. From 645c8899b1b6b47286729455e15d2a665b2360d3 Mon Sep 17 00:00:00 2001 From: zhayujie Date: Sun, 1 Feb 2026 12:38:00 +0800 Subject: [PATCH 10/10] fix: remove tool --- agent/tools/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/agent/tools/__init__.py b/agent/tools/__init__.py index a749633..a8863ce 100644 --- a/agent/tools/__init__.py +++ b/agent/tools/__init__.py @@ -21,7 +21,6 @@ from agent.tools.memory.memory_get import MemoryGetTool # Import web tools from agent.tools.web_fetch.web_fetch import WebFetch -from agent.tools.bocha_search.bocha_search import BochaSearch # Import tools with optional dependencies def _import_optional_tools(): @@ -94,7 +93,6 @@ __all__ = [ 'MemorySearchTool', 'MemoryGetTool', 'WebFetch', - 'BochaSearch', # Optional tools (may be None if dependencies not available) 'GoogleSearch', 'FileSave',