From 5a466d0ff678003a7c21325917aa52cd27536509 Mon Sep 17 00:00:00 2001
From: saboteur7 <saboteur7@163.com>
Date: Fri, 30 Jan 2026 11:31:13 +0800
Subject: [PATCH] fix: long-term memory bug

---
 agent/memory/config.py              |   2 +-
 agent/memory/manager.py             |   8 +-
 agent/memory/storage.py             | 200 ++++++++++++++++++++++++----
 agent/memory/tools/__init__.py      |  10 --
 agent/memory/tools/memory_get.py    | 118 ----------------
 agent/memory/tools/memory_search.py | 106 ---------------
 agent/tools/edit/edit.py            |   9 ++
 agent/tools/memory/memory_get.py    |   7 +-
 agent/tools/memory/memory_search.py |   6 +-
 agent/tools/write/write.py          |   5 +
 memory/2026-01-29.md                |   5 -
 memory/MEMORY.md                    |  21 ---
 12 files changed, 202 insertions(+), 295 deletions(-)
 delete mode 100644 agent/memory/tools/__init__.py
 delete mode 100644 agent/memory/tools/memory_get.py
 delete mode 100644 agent/memory/tools/memory_search.py
 delete mode 100644 memory/2026-01-29.md
 delete mode 100644 memory/MEMORY.md

diff --git a/agent/memory/config.py b/agent/memory/config.py
index 366c134..758611d 100644
--- a/agent/memory/config.py
+++ b/agent/memory/config.py
@@ -28,7 +28,7 @@ class MemoryConfig:
     
     # Search config
     max_results: int = 10
-    min_score: float = 0.3
+    min_score: float = 0.1
     
     # Hybrid search weights
     vector_weight: float = 0.7
diff --git a/agent/memory/manager.py b/agent/memory/manager.py
index 58a135d..c5c2719 100644
--- a/agent/memory/manager.py
+++ b/agent/memory/manager.py
@@ -213,7 +213,6 @@ class MemoryManager:
             
             memory_chunks.append(MemoryChunk(
                 id=chunk_id,
-                agent_id="default",
                 user_id=user_id,
                 scope=scope,
                 source=source,
@@ -330,7 +329,6 @@ class MemoryManager:
             
             memory_chunks.append(MemoryChunk(
                 id=chunk_id,
-                agent_id="default",
                 user_id=user_id,
                 scope=scope,
                 source=source,
@@ -428,7 +426,7 @@ class MemoryManager:
         
         return success
     
-    def build_memory_guidance(self, lang: str = "en", include_context: bool = True) -> str:
+    def build_memory_guidance(self, lang: str = "zh", include_context: bool = True) -> str:
         """
         Build natural memory guidance for agent system prompt
         
@@ -450,7 +448,7 @@ class MemoryManager:
         
         if lang == "zh":
             guidance = f"""## 记忆召回
-回答关于过去工作、决策、日期、人物、偏好或待办事项的问题前：先用 memory_search 搜索 MEMORY.md + memory/*.md；然后用 memory_get 只读取需要的行。如果搜索后仍不确定，说明你已检查过。
+下方"背景知识"包含你的核心长期记忆，可直接使用。如果背景知识中没有相关信息，再用 memory_search 搜索历史记录（memory/*.md 日期文件）。
 
 ## 记忆存储
 当用户分享持久偏好、决策或重要事实时（无论是否明确要求"记住"），主动存储：
@@ -465,7 +463,7 @@ class MemoryManager:
 - 自然使用记忆，就像你本来就知道这些信息"""
         else:
             guidance = f"""## Memory Recall
-Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.
+"Background Knowledge" below contains your core long-term memories - use them directly. If information is not in Background Knowledge, use memory_search to search, then use memory_get to read files (path format: memory/MEMORY.md, memory/2026-01-30.md).
 
 ## Memory Storage
 When user shares durable preferences, decisions, or important facts (whether or not they explicitly say "remember"), proactively store:
diff --git a/agent/memory/storage.py b/agent/memory/storage.py
index 1b09615..b8fccf0 100644
--- a/agent/memory/storage.py
+++ b/agent/memory/storage.py
@@ -50,11 +50,45 @@ class MemoryStorage:
     
     def _init_db(self):
         """Initialize database with schema"""
-        self.conn = sqlite3.connect(str(self.db_path))
-        self.conn.row_factory = sqlite3.Row
-        
-        # Enable JSON support
-        self.conn.execute("PRAGMA journal_mode=WAL")
+        try:
+            self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+            self.conn.row_factory = sqlite3.Row
+            
+            # Check database integrity
+            try:
+                result = self.conn.execute("PRAGMA integrity_check").fetchone()
+                if result[0] != 'ok':
+                    print(f"⚠️  Database integrity check failed: {result[0]}")
+                    print(f"   Recreating database...")
+                    self.conn.close()
+                    self.conn = None
+                    # Remove corrupted database
+                    self.db_path.unlink(missing_ok=True)
+                    # Remove WAL files
+                    Path(str(self.db_path) + '-wal').unlink(missing_ok=True)
+                    Path(str(self.db_path) + '-shm').unlink(missing_ok=True)
+                    # Reconnect to create new database
+                    self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+                    self.conn.row_factory = sqlite3.Row
+            except sqlite3.DatabaseError:
+                # Database is corrupted, recreate it
+                print(f"⚠️  Database is corrupted, recreating...")
+                if self.conn:
+                    self.conn.close()
+                    self.conn = None
+                self.db_path.unlink(missing_ok=True)
+                Path(str(self.db_path) + '-wal').unlink(missing_ok=True)
+                Path(str(self.db_path) + '-shm').unlink(missing_ok=True)
+                self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+                self.conn.row_factory = sqlite3.Row
+            
+            # Enable WAL mode for better concurrency
+            self.conn.execute("PRAGMA journal_mode=WAL")
+            # Set busy timeout to avoid "database is locked" errors
+            self.conn.execute("PRAGMA busy_timeout=5000")
+        except Exception as e:
+            print(f"⚠️  Unexpected error during database initialization: {e}")
+            raise
         
         # Create chunks table with embeddings
         self.conn.execute("""
@@ -92,6 +126,8 @@ class MemoryStorage:
         """)
         
         # Create FTS5 virtual table for keyword search
+        # Use default unicode61 tokenizer (stable and compatible)
+        # For CJK support, we'll use LIKE queries as fallback
         self.conn.execute("""
             CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
                 text,
@@ -261,13 +297,37 @@ class MemoryStorage:
         scopes: List[str] = None,
         limit: int = 10
     ) -> List[SearchResult]:
-        """Keyword search using FTS5"""
+        """
+        Keyword search using FTS5 + LIKE fallback
+        
+        Strategy:
+        1. Try FTS5 search first (good for English and word-based languages)
+        2. If no results and query contains CJK characters, use LIKE search
+        """
         if scopes is None:
             scopes = ["shared"]
             if user_id:
                 scopes.append("user")
         
-        # Build FTS query
+        # Try FTS5 search first
+        fts_results = self._search_fts5(query, user_id, scopes, limit)
+        if fts_results:
+            return fts_results
+        
+        # Fallback to LIKE search for CJK characters
+        if MemoryStorage._contains_cjk(query):
+            return self._search_like(query, user_id, scopes, limit)
+        
+        return []
+    
+    def _search_fts5(
+        self,
+        query: str,
+        user_id: Optional[str],
+        scopes: List[str],
+        limit: int
+    ) -> List[SearchResult]:
+        """FTS5 full-text search"""
         fts_query = self._build_fts_query(query)
         if not fts_query:
             return []
@@ -299,20 +359,83 @@ class MemoryStorage:
             """
             params.append(limit)
         
-        rows = self.conn.execute(sql_query, params).fetchall()
+        try:
+            rows = self.conn.execute(sql_query, params).fetchall()
+            return [
+                SearchResult(
+                    path=row['path'],
+                    start_line=row['start_line'],
+                    end_line=row['end_line'],
+                    score=self._bm25_rank_to_score(row['rank']),
+                    snippet=self._truncate_text(row['text'], 500),
+                    source=row['source'],
+                    user_id=row['user_id']
+                )
+                for row in rows
+            ]
+        except Exception:
+            return []
+    
+    def _search_like(
+        self,
+        query: str,
+        user_id: Optional[str],
+        scopes: List[str],
+        limit: int
+    ) -> List[SearchResult]:
+        """LIKE-based search for CJK characters"""
+        import re
+        # Extract CJK words (2+ characters)
+        cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
+        if not cjk_words:
+            return []
         
-        return [
-            SearchResult(
-                path=row['path'],
-                start_line=row['start_line'],
-                end_line=row['end_line'],
-                score=self._bm25_rank_to_score(row['rank']),
-                snippet=self._truncate_text(row['text'], 500),
-                source=row['source'],
-                user_id=row['user_id']
-            )
-            for row in rows
-        ]
+        scope_placeholders = ','.join('?' * len(scopes))
+        
+        # Build LIKE conditions for each word
+        like_conditions = []
+        params = []
+        for word in cjk_words:
+            like_conditions.append("text LIKE ?")
+            params.append(f'%{word}%')
+        
+        where_clause = ' OR '.join(like_conditions)
+        params.extend(scopes)
+        
+        if user_id:
+            sql_query = f"""
+                SELECT * FROM chunks
+                WHERE ({where_clause})
+                AND scope IN ({scope_placeholders})
+                AND (scope = 'shared' OR user_id = ?)
+                LIMIT ?
+            """
+            params.extend([user_id, limit])
+        else:
+            sql_query = f"""
+                SELECT * FROM chunks
+                WHERE ({where_clause})
+                AND scope IN ({scope_placeholders})
+                LIMIT ?
+            """
+            params.append(limit)
+        
+        try:
+            rows = self.conn.execute(sql_query, params).fetchall()
+            return [
+                SearchResult(
+                    path=row['path'],
+                    start_line=row['start_line'],
+                    end_line=row['end_line'],
+                    score=0.5,  # Fixed score for LIKE search
+                    snippet=self._truncate_text(row['text'], 500),
+                    source=row['source'],
+                    user_id=row['user_id']
+                )
+                for row in rows
+            ]
+        except Exception:
+            return []
     
     def delete_by_path(self, path: str):
         """Delete all chunks from a file"""
@@ -354,7 +477,19 @@ class MemoryStorage:
     def close(self):
         """Close database connection"""
         if self.conn:
-            self.conn.close()
+            try:
+                self.conn.commit()  # Ensure all changes are committed
+                self.conn.close()
+                self.conn = None  # Mark as closed
+            except Exception as e:
+                print(f"⚠️  Error closing database connection: {e}")
+    
+    def __del__(self):
+        """Destructor to ensure connection is closed"""
+        try:
+            self.close()
+        except:
+            pass  # Ignore errors during cleanup
     
     # Helper methods
     
@@ -390,14 +525,29 @@ class MemoryStorage:
         return dot_product / (norm1 * norm2)
     
     @staticmethod
-    def _build_fts_query(raw_query: str) -> Optional[str]:
-        """Build FTS5 query from raw text"""
+    def _contains_cjk(text: str) -> bool:
+        """Check if text contains CJK (Chinese/Japanese/Korean) characters"""
         import re
-        tokens = re.findall(r'[A-Za-z0-9_\u4e00-\u9fff]+', raw_query)
+        return bool(re.search(r'[\u4e00-\u9fff]', text))
+    
+    @staticmethod
+    def _build_fts_query(raw_query: str) -> Optional[str]:
+        """
+        Build FTS5 query from raw text
+        
+        Works best for English and word-based languages.
+        For CJK characters, LIKE search will be used as fallback.
+        """
+        import re
+        # Extract words (primarily English words and numbers)
+        tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
         if not tokens:
             return None
+        
+        # Quote tokens for exact matching
         quoted = [f'"{t}"' for t in tokens]
-        return ' AND '.join(quoted)
+        # Use OR for more flexible matching
+        return ' OR '.join(quoted)
     
     @staticmethod
     def _bm25_rank_to_score(rank: float) -> float:
diff --git a/agent/memory/tools/__init__.py b/agent/memory/tools/__init__.py
deleted file mode 100644
index 2f7a5d0..0000000
--- a/agent/memory/tools/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Memory tools for AgentMesh
-
-Provides memory_search and memory_get tools for agents
-"""
-
-from agent.memory.tools.memory_search import MemorySearchTool
-from agent.memory.tools.memory_get import MemoryGetTool
-
-__all__ = ['MemorySearchTool', 'MemoryGetTool']
diff --git a/agent/memory/tools/memory_get.py b/agent/memory/tools/memory_get.py
deleted file mode 100644
index e9af36c..0000000
--- a/agent/memory/tools/memory_get.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-Memory get tool
-
-Allows agents to read specific sections from memory files
-"""
-
-from typing import Dict, Any, Optional
-from pathlib import Path
-from agent.tools.base_tool import BaseTool
-from agent.memory.manager import MemoryManager
-
-
-class MemoryGetTool(BaseTool):
-    """Tool for reading memory file contents"""
-    
-    def __init__(self, memory_manager: MemoryManager):
-        """
-        Initialize memory get tool
-        
-        Args:
-            memory_manager: MemoryManager instance
-        """
-        super().__init__()
-        self.memory_manager = memory_manager
-        self._name = "memory_get"
-        self._description = (
-            "Read specific memory file content by path and line range. "
-            "Use after memory_search to get full context from historical memory files."
-        )
-    
-    @property
-    def name(self) -> str:
-        return self._name
-    
-    @property
-    def description(self) -> str:
-        return self._description
-    
-    @property
-    def parameters(self) -> Dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "path": {
-                    "type": "string",
-                    "description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')"
-                },
-                "start_line": {
-                    "type": "integer",
-                    "description": "Starting line number (optional, default: 1)",
-                    "default": 1
-                },
-                "num_lines": {
-                    "type": "integer",
-                    "description": "Number of lines to read (optional, reads all if not specified)"
-                }
-            },
-            "required": ["path"]
-        }
-    
-    async def execute(self, **kwargs) -> str:
-        """
-        Execute memory file read
-        
-        Args:
-            path: File path
-            start_line: Start line
-            num_lines: Number of lines
-            
-        Returns:
-            File content
-        """
-        path = kwargs.get("path")
-        start_line = kwargs.get("start_line", 1)
-        num_lines = kwargs.get("num_lines")
-        
-        if not path:
-            return "Error: path parameter is required"
-        
-        try:
-            workspace_dir = self.memory_manager.config.get_workspace()
-            file_path = workspace_dir / path
-            
-            if not file_path.exists():
-                return f"Error: File not found: {path}"
-            
-            content = file_path.read_text()
-            lines = content.split('\n')
-            
-            # Handle line range
-            if start_line < 1:
-                start_line = 1
-            
-            start_idx = start_line - 1
-            
-            if num_lines:
-                end_idx = start_idx + num_lines
-                selected_lines = lines[start_idx:end_idx]
-            else:
-                selected_lines = lines[start_idx:]
-            
-            result = '\n'.join(selected_lines)
-            
-            # Add metadata
-            total_lines = len(lines)
-            shown_lines = len(selected_lines)
-            
-            output = [
-                f"File: {path}",
-                f"Lines: {start_line}-{start_line + shown_lines - 1} (total: {total_lines})",
-                "",
-                result
-            ]
-            
-            return '\n'.join(output)
-            
-        except Exception as e:
-            return f"Error reading memory file: {str(e)}"
diff --git a/agent/memory/tools/memory_search.py b/agent/memory/tools/memory_search.py
deleted file mode 100644
index 1cfda07..0000000
--- a/agent/memory/tools/memory_search.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
-Memory search tool
-
-Allows agents to search their memory using semantic and keyword search
-"""
-
-from typing import Dict, Any, Optional
-from agent.tools.base_tool import BaseTool
-from agent.memory.manager import MemoryManager
-
-
-class MemorySearchTool(BaseTool):
-    """Tool for searching agent memory"""
-    
-    def __init__(self, memory_manager: MemoryManager, user_id: Optional[str] = None):
-        """
-        Initialize memory search tool
-        
-        Args:
-            memory_manager: MemoryManager instance
-            user_id: Optional user ID for scoped search
-        """
-        super().__init__()
-        self.memory_manager = memory_manager
-        self.user_id = user_id
-        self._name = "memory_search"
-        self._description = (
-            "Search historical memory files (beyond today/yesterday) using semantic and keyword search. "
-            "Recent context (MEMORY.md + today + yesterday) is already loaded. "
-            "Use this ONLY for older dates, specific past events, or when current context lacks needed info."
-        )
-    
-    @property
-    def name(self) -> str:
-        return self._name
-    
-    @property
-    def description(self) -> str:
-        return self._description
-    
-    @property
-    def parameters(self) -> Dict[str, Any]:
-        return {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": "Search query (can be natural language question or keywords)"
-                },
-                "max_results": {
-                    "type": "integer",
-                    "description": "Maximum number of results to return (default: 10)",
-                    "default": 10
-                },
-                "min_score": {
-                    "type": "number",
-                    "description": "Minimum relevance score (0-1, default: 0.3)",
-                    "default": 0.3
-                }
-            },
-            "required": ["query"]
-        }
-    
-    async def execute(self, **kwargs) -> str:
-        """
-        Execute memory search
-        
-        Args:
-            query: Search query
-            max_results: Maximum results
-            min_score: Minimum score
-            
-        Returns:
-            Formatted search results
-        """
-        query = kwargs.get("query")
-        max_results = kwargs.get("max_results", 10)
-        min_score = kwargs.get("min_score", 0.3)
-        
-        if not query:
-            return "Error: query parameter is required"
-        
-        try:
-            results = await self.memory_manager.search(
-                query=query,
-                user_id=self.user_id,
-                max_results=max_results,
-                min_score=min_score,
-                include_shared=True
-            )
-            
-            if not results:
-                return f"No relevant memories found for query: {query}"
-            
-            # Format results
-            output = [f"Found {len(results)} relevant memories:\n"]
-            
-            for i, result in enumerate(results, 1):
-                output.append(f"\n{i}. {result.path} (lines {result.start_line}-{result.end_line})")
-                output.append(f"   Score: {result.score:.3f}")
-                output.append(f"   Snippet: {result.snippet}")
-            
-            return "\n".join(output)
-            
-        except Exception as e:
-            return f"Error searching memory: {str(e)}"
diff --git a/agent/tools/edit/edit.py b/agent/tools/edit/edit.py
index 54f7529..566309b 100644
--- a/agent/tools/edit/edit.py
+++ b/agent/tools/edit/edit.py
@@ -46,6 +46,7 @@ class Edit(BaseTool):
     def __init__(self, config: dict = None):
         self.config = config or {}
         self.cwd = self.config.get("cwd", os.getcwd())
+        self.memory_manager = self.config.get("memory_manager", None)
     
     def execute(self, args: Dict[str, Any]) -> ToolResult:
         """
@@ -141,6 +142,14 @@ class Edit(BaseTool):
                 "first_changed_line": diff_result['first_changed_line']
             }
             
+            # Notify memory manager if file is in memory directory
+            if self.memory_manager and "memory/" in path:
+                try:
+                    self.memory_manager.mark_dirty()
+                except Exception as e:
+                    # Don't fail the edit if memory notification fails
+                    pass
+            
             return ToolResult.success(result)
             
         except UnicodeDecodeError:
diff --git a/agent/tools/memory/memory_get.py b/agent/tools/memory/memory_get.py
index 0ad1cbd..8abb207 100644
--- a/agent/tools/memory/memory_get.py
+++ b/agent/tools/memory/memory_get.py
@@ -22,7 +22,7 @@ class MemoryGetTool(BaseTool):
         "properties": {
             "path": {
                 "type": "string",
-                "description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')"
+                "description": "Relative path to the memory file (e.g., 'memory/MEMORY.md', 'memory/2024-01-29.md')"
             },
             "start_line": {
                 "type": "integer",
@@ -68,6 +68,11 @@ class MemoryGetTool(BaseTool):
         
         try:
             workspace_dir = self.memory_manager.config.get_workspace()
+            
+            # Auto-prepend memory/ if not present and not absolute path
+            if not path.startswith('memory/') and not path.startswith('/'):
+                path = f'memory/{path}'
+            
             file_path = workspace_dir / path
             
             if not file_path.exists():
diff --git a/agent/tools/memory/memory_search.py b/agent/tools/memory/memory_search.py
index e854d02..6479856 100644
--- a/agent/tools/memory/memory_search.py
+++ b/agent/tools/memory/memory_search.py
@@ -30,8 +30,8 @@ class MemorySearchTool(BaseTool):
             },
             "min_score": {
                 "type": "number",
-                "description": "Minimum relevance score (0-1, default: 0.3)",
-                "default": 0.3
+                "description": "Minimum relevance score (0-1, default: 0.1)",
+                "default": 0.1
             }
         },
         "required": ["query"]
@@ -64,7 +64,7 @@ class MemorySearchTool(BaseTool):
         
         query = args.get("query")
         max_results = args.get("max_results", 10)
-        min_score = args.get("min_score", 0.3)
+        min_score = args.get("min_score", 0.1)
         
         if not query:
             return ToolResult.fail("Error: query parameter is required")
diff --git a/agent/tools/write/write.py b/agent/tools/write/write.py
index a246040..9836564 100644
--- a/agent/tools/write/write.py
+++ b/agent/tools/write/write.py
@@ -34,6 +34,7 @@ class Write(BaseTool):
     def __init__(self, config: dict = None):
         self.config = config or {}
         self.cwd = self.config.get("cwd", os.getcwd())
+        self.memory_manager = self.config.get("memory_manager", None)
     
     def execute(self, args: Dict[str, Any]) -> ToolResult:
         """
@@ -64,6 +65,10 @@ class Write(BaseTool):
             # Get bytes written
             bytes_written = len(content.encode('utf-8'))
             
+            # Auto-sync to memory database if this is a memory file
+            if self.memory_manager and 'memory/' in path:
+                self.memory_manager.mark_dirty()
+            
             result = {
                 "message": f"Successfully wrote {bytes_written} bytes to {path}",
                 "path": path,
diff --git a/memory/2026-01-29.md b/memory/2026-01-29.md
deleted file mode 100644
index 44c18e3..0000000
--- a/memory/2026-01-29.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# 2026-01-29 记录
-
-## 老王的重要决定
-- 今天老王告诉我他决定要学AI了，这是一个重要的决策
-- 这可能会是他学习和职业发展的一个转折点
\ No newline at end of file
diff --git a/memory/MEMORY.md b/memory/MEMORY.md
deleted file mode 100644
index d80e5cd..0000000
--- a/memory/MEMORY.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Memory
-
-Long-term curated memories and preferences.
-
-## 用户信息
-- 用户名：老王
-
-## 用户信息
-- 用户名：老王
-
-## 用户偏好
-- 喜欢吃红烧肉
-- 爱打篮球
-
-## 重要决策
-- 决定要学习AI（2026-01-29）
-
-## Notes
-
-- Important decisions and facts go here
-- This is your long-term knowledge base
\ No newline at end of file