From 5a466d0ff678003a7c21325917aa52cd27536509 Mon Sep 17 00:00:00 2001 From: saboteur7 Date: Fri, 30 Jan 2026 11:31:13 +0800 Subject: [PATCH] fix: long-term memory bug --- agent/memory/config.py | 2 +- agent/memory/manager.py | 8 +- agent/memory/storage.py | 200 ++++++++++++++++++++++++---- agent/memory/tools/__init__.py | 10 -- agent/memory/tools/memory_get.py | 118 ---------------- agent/memory/tools/memory_search.py | 106 --------------- agent/tools/edit/edit.py | 9 ++ agent/tools/memory/memory_get.py | 7 +- agent/tools/memory/memory_search.py | 6 +- agent/tools/write/write.py | 5 + memory/2026-01-29.md | 5 - memory/MEMORY.md | 21 --- 12 files changed, 202 insertions(+), 295 deletions(-) delete mode 100644 agent/memory/tools/__init__.py delete mode 100644 agent/memory/tools/memory_get.py delete mode 100644 agent/memory/tools/memory_search.py delete mode 100644 memory/2026-01-29.md delete mode 100644 memory/MEMORY.md diff --git a/agent/memory/config.py b/agent/memory/config.py index 366c134..758611d 100644 --- a/agent/memory/config.py +++ b/agent/memory/config.py @@ -28,7 +28,7 @@ class MemoryConfig: # Search config max_results: int = 10 - min_score: float = 0.3 + min_score: float = 0.1 # Hybrid search weights vector_weight: float = 0.7 diff --git a/agent/memory/manager.py b/agent/memory/manager.py index 58a135d..c5c2719 100644 --- a/agent/memory/manager.py +++ b/agent/memory/manager.py @@ -213,7 +213,6 @@ class MemoryManager: memory_chunks.append(MemoryChunk( id=chunk_id, - agent_id="default", user_id=user_id, scope=scope, source=source, @@ -330,7 +329,6 @@ class MemoryManager: memory_chunks.append(MemoryChunk( id=chunk_id, - agent_id="default", user_id=user_id, scope=scope, source=source, @@ -428,7 +426,7 @@ class MemoryManager: return success - def build_memory_guidance(self, lang: str = "en", include_context: bool = True) -> str: + def build_memory_guidance(self, lang: str = "zh", include_context: bool = True) -> str: """ Build natural memory guidance for agent system prompt @@ -450,7 +448,7 @@ class MemoryManager: if lang == "zh": guidance = f"""## 记忆召回 -回答关于过去工作、决策、日期、人物、偏好或待办事项的问题前:先用 memory_search 搜索 MEMORY.md + memory/*.md;然后用 memory_get 只读取需要的行。如果搜索后仍不确定,说明你已检查过。 +下方"背景知识"包含你的核心长期记忆,可直接使用。如果背景知识中没有相关信息,再用 memory_search 搜索历史记录(memory/*.md 日期文件)。 ## 记忆存储 当用户分享持久偏好、决策或重要事实时(无论是否明确要求"记住"),主动存储: @@ -465,7 +463,7 @@ class MemoryManager: - 自然使用记忆,就像你本来就知道这些信息""" else: guidance = f"""## Memory Recall -Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked. +"Background Knowledge" below contains your core long-term memories - use them directly. If information is not in Background Knowledge, use memory_search to search, then use memory_get to read files (path format: memory/MEMORY.md, memory/2026-01-30.md). ## Memory Storage When user shares durable preferences, decisions, or important facts (whether or not they explicitly say "remember"), proactively store: diff --git a/agent/memory/storage.py b/agent/memory/storage.py index 1b09615..b8fccf0 100644 --- a/agent/memory/storage.py +++ b/agent/memory/storage.py @@ -50,11 +50,45 @@ class MemoryStorage: def _init_db(self): """Initialize database with schema""" - self.conn = sqlite3.connect(str(self.db_path)) - self.conn.row_factory = sqlite3.Row - - # Enable JSON support - self.conn.execute("PRAGMA journal_mode=WAL") + try: + self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False) + self.conn.row_factory = sqlite3.Row + + # Check database integrity + try: + result = self.conn.execute("PRAGMA integrity_check").fetchone() + if result[0] != 'ok': + print(f"⚠️ Database integrity check failed: {result[0]}") + print(f" Recreating database...") + self.conn.close() + self.conn = None + # Remove corrupted database + self.db_path.unlink(missing_ok=True) + # Remove WAL files + Path(str(self.db_path) + '-wal').unlink(missing_ok=True) + Path(str(self.db_path) + '-shm').unlink(missing_ok=True) + # Reconnect to create new database + self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False) + self.conn.row_factory = sqlite3.Row + except sqlite3.DatabaseError: + # Database is corrupted, recreate it + print(f"⚠️ Database is corrupted, recreating...") + if self.conn: + self.conn.close() + self.conn = None + self.db_path.unlink(missing_ok=True) + Path(str(self.db_path) + '-wal').unlink(missing_ok=True) + Path(str(self.db_path) + '-shm').unlink(missing_ok=True) + self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False) + self.conn.row_factory = sqlite3.Row + + # Enable WAL mode for better concurrency + self.conn.execute("PRAGMA journal_mode=WAL") + # Set busy timeout to avoid "database is locked" errors + self.conn.execute("PRAGMA busy_timeout=5000") + except Exception as e: + print(f"⚠️ Unexpected error during database initialization: {e}") + raise # Create chunks table with embeddings self.conn.execute(""" @@ -92,6 +126,8 @@ class MemoryStorage: """) # Create FTS5 virtual table for keyword search + # Use default unicode61 tokenizer (stable and compatible) + # For CJK support, we'll use LIKE queries as fallback self.conn.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5( text, @@ -261,13 +297,37 @@ class MemoryStorage: scopes: List[str] = None, limit: int = 10 ) -> List[SearchResult]: - """Keyword search using FTS5""" + """ + Keyword search using FTS5 + LIKE fallback + + Strategy: + 1. Try FTS5 search first (good for English and word-based languages) + 2. If no results and query contains CJK characters, use LIKE search + """ if scopes is None: scopes = ["shared"] if user_id: scopes.append("user") - # Build FTS query + # Try FTS5 search first + fts_results = self._search_fts5(query, user_id, scopes, limit) + if fts_results: + return fts_results + + # Fallback to LIKE search for CJK characters + if MemoryStorage._contains_cjk(query): + return self._search_like(query, user_id, scopes, limit) + + return [] + + def _search_fts5( + self, + query: str, + user_id: Optional[str], + scopes: List[str], + limit: int + ) -> List[SearchResult]: + """FTS5 full-text search""" fts_query = self._build_fts_query(query) if not fts_query: return [] @@ -299,20 +359,83 @@ class MemoryStorage: """ params.append(limit) - rows = self.conn.execute(sql_query, params).fetchall() + try: + rows = self.conn.execute(sql_query, params).fetchall() + return [ + SearchResult( + path=row['path'], + start_line=row['start_line'], + end_line=row['end_line'], + score=self._bm25_rank_to_score(row['rank']), + snippet=self._truncate_text(row['text'], 500), + source=row['source'], + user_id=row['user_id'] + ) + for row in rows + ] + except Exception: + return [] + + def _search_like( + self, + query: str, + user_id: Optional[str], + scopes: List[str], + limit: int + ) -> List[SearchResult]: + """LIKE-based search for CJK characters""" + import re + # Extract CJK words (2+ characters) + cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query) + if not cjk_words: + return [] - return [ - SearchResult( - path=row['path'], - start_line=row['start_line'], - end_line=row['end_line'], - score=self._bm25_rank_to_score(row['rank']), - snippet=self._truncate_text(row['text'], 500), - source=row['source'], - user_id=row['user_id'] - ) - for row in rows - ] + scope_placeholders = ','.join('?' * len(scopes)) + + # Build LIKE conditions for each word + like_conditions = [] + params = [] + for word in cjk_words: + like_conditions.append("text LIKE ?") + params.append(f'%{word}%') + + where_clause = ' OR '.join(like_conditions) + params.extend(scopes) + + if user_id: + sql_query = f""" + SELECT * FROM chunks + WHERE ({where_clause}) + AND scope IN ({scope_placeholders}) + AND (scope = 'shared' OR user_id = ?) + LIMIT ? + """ + params.extend([user_id, limit]) + else: + sql_query = f""" + SELECT * FROM chunks + WHERE ({where_clause}) + AND scope IN ({scope_placeholders}) + LIMIT ? + """ + params.append(limit) + + try: + rows = self.conn.execute(sql_query, params).fetchall() + return [ + SearchResult( + path=row['path'], + start_line=row['start_line'], + end_line=row['end_line'], + score=0.5, # Fixed score for LIKE search + snippet=self._truncate_text(row['text'], 500), + source=row['source'], + user_id=row['user_id'] + ) + for row in rows + ] + except Exception: + return [] def delete_by_path(self, path: str): """Delete all chunks from a file""" @@ -354,7 +477,19 @@ class MemoryStorage: def close(self): """Close database connection""" if self.conn: - self.conn.close() + try: + self.conn.commit() # Ensure all changes are committed + self.conn.close() + self.conn = None # Mark as closed + except Exception as e: + print(f"⚠️ Error closing database connection: {e}") + + def __del__(self): + """Destructor to ensure connection is closed""" + try: + self.close() + except: + pass # Ignore errors during cleanup # Helper methods @@ -390,14 +525,29 @@ class MemoryStorage: return dot_product / (norm1 * norm2) @staticmethod - def _build_fts_query(raw_query: str) -> Optional[str]: - """Build FTS5 query from raw text""" + def _contains_cjk(text: str) -> bool: + """Check if text contains CJK (Chinese/Japanese/Korean) characters""" import re - tokens = re.findall(r'[A-Za-z0-9_\u4e00-\u9fff]+', raw_query) + return bool(re.search(r'[\u4e00-\u9fff]', text)) + + @staticmethod + def _build_fts_query(raw_query: str) -> Optional[str]: + """ + Build FTS5 query from raw text + + Works best for English and word-based languages. + For CJK characters, LIKE search will be used as fallback. + """ + import re + # Extract words (primarily English words and numbers) + tokens = re.findall(r'[A-Za-z0-9_]+', raw_query) if not tokens: return None + + # Quote tokens for exact matching quoted = [f'"{t}"' for t in tokens] - return ' AND '.join(quoted) + # Use OR for more flexible matching + return ' OR '.join(quoted) @staticmethod def _bm25_rank_to_score(rank: float) -> float: diff --git a/agent/memory/tools/__init__.py b/agent/memory/tools/__init__.py deleted file mode 100644 index 2f7a5d0..0000000 --- a/agent/memory/tools/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -Memory tools for AgentMesh - -Provides memory_search and memory_get tools for agents -""" - -from agent.memory.tools.memory_search import MemorySearchTool -from agent.memory.tools.memory_get import MemoryGetTool - -__all__ = ['MemorySearchTool', 'MemoryGetTool'] diff --git a/agent/memory/tools/memory_get.py b/agent/memory/tools/memory_get.py deleted file mode 100644 index e9af36c..0000000 --- a/agent/memory/tools/memory_get.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -Memory get tool - -Allows agents to read specific sections from memory files -""" - -from typing import Dict, Any, Optional -from pathlib import Path -from agent.tools.base_tool import BaseTool -from agent.memory.manager import MemoryManager - - -class MemoryGetTool(BaseTool): - """Tool for reading memory file contents""" - - def __init__(self, memory_manager: MemoryManager): - """ - Initialize memory get tool - - Args: - memory_manager: MemoryManager instance - """ - super().__init__() - self.memory_manager = memory_manager - self._name = "memory_get" - self._description = ( - "Read specific memory file content by path and line range. " - "Use after memory_search to get full context from historical memory files." - ) - - @property - def name(self) -> str: - return self._name - - @property - def description(self) -> str: - return self._description - - @property - def parameters(self) -> Dict[str, Any]: - return { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')" - }, - "start_line": { - "type": "integer", - "description": "Starting line number (optional, default: 1)", - "default": 1 - }, - "num_lines": { - "type": "integer", - "description": "Number of lines to read (optional, reads all if not specified)" - } - }, - "required": ["path"] - } - - async def execute(self, **kwargs) -> str: - """ - Execute memory file read - - Args: - path: File path - start_line: Start line - num_lines: Number of lines - - Returns: - File content - """ - path = kwargs.get("path") - start_line = kwargs.get("start_line", 1) - num_lines = kwargs.get("num_lines") - - if not path: - return "Error: path parameter is required" - - try: - workspace_dir = self.memory_manager.config.get_workspace() - file_path = workspace_dir / path - - if not file_path.exists(): - return f"Error: File not found: {path}" - - content = file_path.read_text() - lines = content.split('\n') - - # Handle line range - if start_line < 1: - start_line = 1 - - start_idx = start_line - 1 - - if num_lines: - end_idx = start_idx + num_lines - selected_lines = lines[start_idx:end_idx] - else: - selected_lines = lines[start_idx:] - - result = '\n'.join(selected_lines) - - # Add metadata - total_lines = len(lines) - shown_lines = len(selected_lines) - - output = [ - f"File: {path}", - f"Lines: {start_line}-{start_line + shown_lines - 1} (total: {total_lines})", - "", - result - ] - - return '\n'.join(output) - - except Exception as e: - return f"Error reading memory file: {str(e)}" diff --git a/agent/memory/tools/memory_search.py b/agent/memory/tools/memory_search.py deleted file mode 100644 index 1cfda07..0000000 --- a/agent/memory/tools/memory_search.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Memory search tool - -Allows agents to search their memory using semantic and keyword search -""" - -from typing import Dict, Any, Optional -from agent.tools.base_tool import BaseTool -from agent.memory.manager import MemoryManager - - -class MemorySearchTool(BaseTool): - """Tool for searching agent memory""" - - def __init__(self, memory_manager: MemoryManager, user_id: Optional[str] = None): - """ - Initialize memory search tool - - Args: - memory_manager: MemoryManager instance - user_id: Optional user ID for scoped search - """ - super().__init__() - self.memory_manager = memory_manager - self.user_id = user_id - self._name = "memory_search" - self._description = ( - "Search historical memory files (beyond today/yesterday) using semantic and keyword search. " - "Recent context (MEMORY.md + today + yesterday) is already loaded. " - "Use this ONLY for older dates, specific past events, or when current context lacks needed info." - ) - - @property - def name(self) -> str: - return self._name - - @property - def description(self) -> str: - return self._description - - @property - def parameters(self) -> Dict[str, Any]: - return { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Search query (can be natural language question or keywords)" - }, - "max_results": { - "type": "integer", - "description": "Maximum number of results to return (default: 10)", - "default": 10 - }, - "min_score": { - "type": "number", - "description": "Minimum relevance score (0-1, default: 0.3)", - "default": 0.3 - } - }, - "required": ["query"] - } - - async def execute(self, **kwargs) -> str: - """ - Execute memory search - - Args: - query: Search query - max_results: Maximum results - min_score: Minimum score - - Returns: - Formatted search results - """ - query = kwargs.get("query") - max_results = kwargs.get("max_results", 10) - min_score = kwargs.get("min_score", 0.3) - - if not query: - return "Error: query parameter is required" - - try: - results = await self.memory_manager.search( - query=query, - user_id=self.user_id, - max_results=max_results, - min_score=min_score, - include_shared=True - ) - - if not results: - return f"No relevant memories found for query: {query}" - - # Format results - output = [f"Found {len(results)} relevant memories:\n"] - - for i, result in enumerate(results, 1): - output.append(f"\n{i}. {result.path} (lines {result.start_line}-{result.end_line})") - output.append(f" Score: {result.score:.3f}") - output.append(f" Snippet: {result.snippet}") - - return "\n".join(output) - - except Exception as e: - return f"Error searching memory: {str(e)}" diff --git a/agent/tools/edit/edit.py b/agent/tools/edit/edit.py index 54f7529..566309b 100644 --- a/agent/tools/edit/edit.py +++ b/agent/tools/edit/edit.py @@ -46,6 +46,7 @@ class Edit(BaseTool): def __init__(self, config: dict = None): self.config = config or {} self.cwd = self.config.get("cwd", os.getcwd()) + self.memory_manager = self.config.get("memory_manager", None) def execute(self, args: Dict[str, Any]) -> ToolResult: """ @@ -141,6 +142,14 @@ class Edit(BaseTool): "first_changed_line": diff_result['first_changed_line'] } + # Notify memory manager if file is in memory directory + if self.memory_manager and "memory/" in path: + try: + self.memory_manager.mark_dirty() + except Exception as e: + # Don't fail the edit if memory notification fails + pass + return ToolResult.success(result) except UnicodeDecodeError: diff --git a/agent/tools/memory/memory_get.py b/agent/tools/memory/memory_get.py index 0ad1cbd..8abb207 100644 --- a/agent/tools/memory/memory_get.py +++ b/agent/tools/memory/memory_get.py @@ -22,7 +22,7 @@ class MemoryGetTool(BaseTool): "properties": { "path": { "type": "string", - "description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')" + "description": "Relative path to the memory file (e.g., 'memory/MEMORY.md', 'memory/2024-01-29.md')" }, "start_line": { "type": "integer", @@ -68,6 +68,11 @@ class MemoryGetTool(BaseTool): try: workspace_dir = self.memory_manager.config.get_workspace() + + # Auto-prepend memory/ if not present and not absolute path + if not path.startswith('memory/') and not path.startswith('/'): + path = f'memory/{path}' + file_path = workspace_dir / path if not file_path.exists(): diff --git a/agent/tools/memory/memory_search.py b/agent/tools/memory/memory_search.py index e854d02..6479856 100644 --- a/agent/tools/memory/memory_search.py +++ b/agent/tools/memory/memory_search.py @@ -30,8 +30,8 @@ class MemorySearchTool(BaseTool): }, "min_score": { "type": "number", - "description": "Minimum relevance score (0-1, default: 0.3)", - "default": 0.3 + "description": "Minimum relevance score (0-1, default: 0.1)", + "default": 0.1 } }, "required": ["query"] @@ -64,7 +64,7 @@ class MemorySearchTool(BaseTool): query = args.get("query") max_results = args.get("max_results", 10) - min_score = args.get("min_score", 0.3) + min_score = args.get("min_score", 0.1) if not query: return ToolResult.fail("Error: query parameter is required") diff --git a/agent/tools/write/write.py b/agent/tools/write/write.py index a246040..9836564 100644 --- a/agent/tools/write/write.py +++ b/agent/tools/write/write.py @@ -34,6 +34,7 @@ class Write(BaseTool): def __init__(self, config: dict = None): self.config = config or {} self.cwd = self.config.get("cwd", os.getcwd()) + self.memory_manager = self.config.get("memory_manager", None) def execute(self, args: Dict[str, Any]) -> ToolResult: """ @@ -64,6 +65,10 @@ class Write(BaseTool): # Get bytes written bytes_written = len(content.encode('utf-8')) + # Auto-sync to memory database if this is a memory file + if self.memory_manager and 'memory/' in path: + self.memory_manager.mark_dirty() + result = { "message": f"Successfully wrote {bytes_written} bytes to {path}", "path": path, diff --git a/memory/2026-01-29.md b/memory/2026-01-29.md deleted file mode 100644 index 44c18e3..0000000 --- a/memory/2026-01-29.md +++ /dev/null @@ -1,5 +0,0 @@ -# 2026-01-29 记录 - -## 老王的重要决定 -- 今天老王告诉我他决定要学AI了,这是一个重要的决策 -- 这可能会是他学习和职业发展的一个转折点 \ No newline at end of file diff --git a/memory/MEMORY.md b/memory/MEMORY.md deleted file mode 100644 index d80e5cd..0000000 --- a/memory/MEMORY.md +++ /dev/null @@ -1,21 +0,0 @@ -# Memory - -Long-term curated memories and preferences. - -## 用户信息 -- 用户名:老王 - -## 用户信息 -- 用户名:老王 - -## 用户偏好 -- 喜欢吃红烧肉 -- 爱打篮球 - -## 重要决策 -- 决定要学习AI(2026-01-29) - -## Notes - -- Important decisions and facts go here -- This is your long-term knowledge base \ No newline at end of file