fix: long-term memory bug

This commit is contained in:
saboteur7
2026-01-30 11:31:13 +08:00
parent bb850bb6c5
commit 5a466d0ff6
12 changed files with 202 additions and 295 deletions

View File

@@ -28,7 +28,7 @@ class MemoryConfig:
# Search config
max_results: int = 10
min_score: float = 0.3
min_score: float = 0.1
# Hybrid search weights
vector_weight: float = 0.7

View File

@@ -213,7 +213,6 @@ class MemoryManager:
memory_chunks.append(MemoryChunk(
id=chunk_id,
agent_id="default",
user_id=user_id,
scope=scope,
source=source,
@@ -330,7 +329,6 @@ class MemoryManager:
memory_chunks.append(MemoryChunk(
id=chunk_id,
agent_id="default",
user_id=user_id,
scope=scope,
source=source,
@@ -428,7 +426,7 @@ class MemoryManager:
return success
def build_memory_guidance(self, lang: str = "en", include_context: bool = True) -> str:
def build_memory_guidance(self, lang: str = "zh", include_context: bool = True) -> str:
"""
Build natural memory guidance for agent system prompt
@@ -450,7 +448,7 @@ class MemoryManager:
if lang == "zh":
guidance = f"""## 记忆召回
回答关于过去工作、决策、日期、人物、偏好或待办事项的问题前:先用 memory_search 搜索 MEMORY.md + memory/*.md然后用 memory_get 只读取需要的行。如果搜索后仍不确定,说明你已检查过
下方"背景知识"包含你的核心长期记忆,可直接使用。如果背景知识中没有相关信息,再用 memory_search 搜索历史记录memory/*.md 日期文件)
## 记忆存储
当用户分享持久偏好、决策或重要事实时(无论是否明确要求"记住"),主动存储:
@@ -465,7 +463,7 @@ class MemoryManager:
- 自然使用记忆,就像你本来就知道这些信息"""
else:
guidance = f"""## Memory Recall
Before answering anything about prior work, decisions, dates, people, preferences, or todos: run memory_search on MEMORY.md + memory/*.md; then use memory_get to pull only the needed lines. If low confidence after search, say you checked.
"Background Knowledge" below contains your core long-term memories - use them directly. If information is not in Background Knowledge, use memory_search to search, then use memory_get to read files (path format: memory/MEMORY.md, memory/2026-01-30.md).
## Memory Storage
When user shares durable preferences, decisions, or important facts (whether or not they explicitly say "remember"), proactively store:

View File

@@ -50,11 +50,45 @@ class MemoryStorage:
def _init_db(self):
"""Initialize database with schema"""
self.conn = sqlite3.connect(str(self.db_path))
self.conn.row_factory = sqlite3.Row
# Enable JSON support
self.conn.execute("PRAGMA journal_mode=WAL")
try:
self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
self.conn.row_factory = sqlite3.Row
# Check database integrity
try:
result = self.conn.execute("PRAGMA integrity_check").fetchone()
if result[0] != 'ok':
print(f"⚠️ Database integrity check failed: {result[0]}")
print(f" Recreating database...")
self.conn.close()
self.conn = None
# Remove corrupted database
self.db_path.unlink(missing_ok=True)
# Remove WAL files
Path(str(self.db_path) + '-wal').unlink(missing_ok=True)
Path(str(self.db_path) + '-shm').unlink(missing_ok=True)
# Reconnect to create new database
self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
self.conn.row_factory = sqlite3.Row
except sqlite3.DatabaseError:
# Database is corrupted, recreate it
print(f"⚠️ Database is corrupted, recreating...")
if self.conn:
self.conn.close()
self.conn = None
self.db_path.unlink(missing_ok=True)
Path(str(self.db_path) + '-wal').unlink(missing_ok=True)
Path(str(self.db_path) + '-shm').unlink(missing_ok=True)
self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
self.conn.row_factory = sqlite3.Row
# Enable WAL mode for better concurrency
self.conn.execute("PRAGMA journal_mode=WAL")
# Set busy timeout to avoid "database is locked" errors
self.conn.execute("PRAGMA busy_timeout=5000")
except Exception as e:
print(f"⚠️ Unexpected error during database initialization: {e}")
raise
# Create chunks table with embeddings
self.conn.execute("""
@@ -92,6 +126,8 @@ class MemoryStorage:
""")
# Create FTS5 virtual table for keyword search
# Use default unicode61 tokenizer (stable and compatible)
# For CJK support, we'll use LIKE queries as fallback
self.conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
text,
@@ -261,13 +297,37 @@ class MemoryStorage:
scopes: List[str] = None,
limit: int = 10
) -> List[SearchResult]:
"""Keyword search using FTS5"""
"""
Keyword search using FTS5 + LIKE fallback
Strategy:
1. Try FTS5 search first (good for English and word-based languages)
2. If no results and query contains CJK characters, use LIKE search
"""
if scopes is None:
scopes = ["shared"]
if user_id:
scopes.append("user")
# Build FTS query
# Try FTS5 search first
fts_results = self._search_fts5(query, user_id, scopes, limit)
if fts_results:
return fts_results
# Fallback to LIKE search for CJK characters
if MemoryStorage._contains_cjk(query):
return self._search_like(query, user_id, scopes, limit)
return []
def _search_fts5(
self,
query: str,
user_id: Optional[str],
scopes: List[str],
limit: int
) -> List[SearchResult]:
"""FTS5 full-text search"""
fts_query = self._build_fts_query(query)
if not fts_query:
return []
@@ -299,20 +359,83 @@ class MemoryStorage:
"""
params.append(limit)
rows = self.conn.execute(sql_query, params).fetchall()
try:
rows = self.conn.execute(sql_query, params).fetchall()
return [
SearchResult(
path=row['path'],
start_line=row['start_line'],
end_line=row['end_line'],
score=self._bm25_rank_to_score(row['rank']),
snippet=self._truncate_text(row['text'], 500),
source=row['source'],
user_id=row['user_id']
)
for row in rows
]
except Exception:
return []
def _search_like(
self,
query: str,
user_id: Optional[str],
scopes: List[str],
limit: int
) -> List[SearchResult]:
"""LIKE-based search for CJK characters"""
import re
# Extract CJK words (2+ characters)
cjk_words = re.findall(r'[\u4e00-\u9fff]{2,}', query)
if not cjk_words:
return []
return [
SearchResult(
path=row['path'],
start_line=row['start_line'],
end_line=row['end_line'],
score=self._bm25_rank_to_score(row['rank']),
snippet=self._truncate_text(row['text'], 500),
source=row['source'],
user_id=row['user_id']
)
for row in rows
]
scope_placeholders = ','.join('?' * len(scopes))
# Build LIKE conditions for each word
like_conditions = []
params = []
for word in cjk_words:
like_conditions.append("text LIKE ?")
params.append(f'%{word}%')
where_clause = ' OR '.join(like_conditions)
params.extend(scopes)
if user_id:
sql_query = f"""
SELECT * FROM chunks
WHERE ({where_clause})
AND scope IN ({scope_placeholders})
AND (scope = 'shared' OR user_id = ?)
LIMIT ?
"""
params.extend([user_id, limit])
else:
sql_query = f"""
SELECT * FROM chunks
WHERE ({where_clause})
AND scope IN ({scope_placeholders})
LIMIT ?
"""
params.append(limit)
try:
rows = self.conn.execute(sql_query, params).fetchall()
return [
SearchResult(
path=row['path'],
start_line=row['start_line'],
end_line=row['end_line'],
score=0.5, # Fixed score for LIKE search
snippet=self._truncate_text(row['text'], 500),
source=row['source'],
user_id=row['user_id']
)
for row in rows
]
except Exception:
return []
def delete_by_path(self, path: str):
"""Delete all chunks from a file"""
@@ -354,7 +477,19 @@ class MemoryStorage:
def close(self):
"""Close database connection"""
if self.conn:
self.conn.close()
try:
self.conn.commit() # Ensure all changes are committed
self.conn.close()
self.conn = None # Mark as closed
except Exception as e:
print(f"⚠️ Error closing database connection: {e}")
def __del__(self):
"""Destructor to ensure connection is closed"""
try:
self.close()
except:
pass # Ignore errors during cleanup
# Helper methods
@@ -390,14 +525,29 @@ class MemoryStorage:
return dot_product / (norm1 * norm2)
@staticmethod
def _build_fts_query(raw_query: str) -> Optional[str]:
"""Build FTS5 query from raw text"""
def _contains_cjk(text: str) -> bool:
"""Check if text contains CJK (Chinese/Japanese/Korean) characters"""
import re
tokens = re.findall(r'[A-Za-z0-9_\u4e00-\u9fff]+', raw_query)
return bool(re.search(r'[\u4e00-\u9fff]', text))
@staticmethod
def _build_fts_query(raw_query: str) -> Optional[str]:
"""
Build FTS5 query from raw text
Works best for English and word-based languages.
For CJK characters, LIKE search will be used as fallback.
"""
import re
# Extract words (primarily English words and numbers)
tokens = re.findall(r'[A-Za-z0-9_]+', raw_query)
if not tokens:
return None
# Quote tokens for exact matching
quoted = [f'"{t}"' for t in tokens]
return ' AND '.join(quoted)
# Use OR for more flexible matching
return ' OR '.join(quoted)
@staticmethod
def _bm25_rank_to_score(rank: float) -> float:

View File

@@ -1,10 +0,0 @@
"""
Memory tools for AgentMesh
Provides memory_search and memory_get tools for agents
"""
from agent.memory.tools.memory_search import MemorySearchTool
from agent.memory.tools.memory_get import MemoryGetTool
__all__ = ['MemorySearchTool', 'MemoryGetTool']

View File

@@ -1,118 +0,0 @@
"""
Memory get tool
Allows agents to read specific sections from memory files
"""
from typing import Dict, Any, Optional
from pathlib import Path
from agent.tools.base_tool import BaseTool
from agent.memory.manager import MemoryManager
class MemoryGetTool(BaseTool):
"""Tool for reading memory file contents"""
def __init__(self, memory_manager: MemoryManager):
"""
Initialize memory get tool
Args:
memory_manager: MemoryManager instance
"""
super().__init__()
self.memory_manager = memory_manager
self._name = "memory_get"
self._description = (
"Read specific memory file content by path and line range. "
"Use after memory_search to get full context from historical memory files."
)
@property
def name(self) -> str:
return self._name
@property
def description(self) -> str:
return self._description
@property
def parameters(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')"
},
"start_line": {
"type": "integer",
"description": "Starting line number (optional, default: 1)",
"default": 1
},
"num_lines": {
"type": "integer",
"description": "Number of lines to read (optional, reads all if not specified)"
}
},
"required": ["path"]
}
async def execute(self, **kwargs) -> str:
"""
Execute memory file read
Args:
path: File path
start_line: Start line
num_lines: Number of lines
Returns:
File content
"""
path = kwargs.get("path")
start_line = kwargs.get("start_line", 1)
num_lines = kwargs.get("num_lines")
if not path:
return "Error: path parameter is required"
try:
workspace_dir = self.memory_manager.config.get_workspace()
file_path = workspace_dir / path
if not file_path.exists():
return f"Error: File not found: {path}"
content = file_path.read_text()
lines = content.split('\n')
# Handle line range
if start_line < 1:
start_line = 1
start_idx = start_line - 1
if num_lines:
end_idx = start_idx + num_lines
selected_lines = lines[start_idx:end_idx]
else:
selected_lines = lines[start_idx:]
result = '\n'.join(selected_lines)
# Add metadata
total_lines = len(lines)
shown_lines = len(selected_lines)
output = [
f"File: {path}",
f"Lines: {start_line}-{start_line + shown_lines - 1} (total: {total_lines})",
"",
result
]
return '\n'.join(output)
except Exception as e:
return f"Error reading memory file: {str(e)}"

View File

@@ -1,106 +0,0 @@
"""
Memory search tool
Allows agents to search their memory using semantic and keyword search
"""
from typing import Dict, Any, Optional
from agent.tools.base_tool import BaseTool
from agent.memory.manager import MemoryManager
class MemorySearchTool(BaseTool):
"""Tool for searching agent memory"""
def __init__(self, memory_manager: MemoryManager, user_id: Optional[str] = None):
"""
Initialize memory search tool
Args:
memory_manager: MemoryManager instance
user_id: Optional user ID for scoped search
"""
super().__init__()
self.memory_manager = memory_manager
self.user_id = user_id
self._name = "memory_search"
self._description = (
"Search historical memory files (beyond today/yesterday) using semantic and keyword search. "
"Recent context (MEMORY.md + today + yesterday) is already loaded. "
"Use this ONLY for older dates, specific past events, or when current context lacks needed info."
)
@property
def name(self) -> str:
return self._name
@property
def description(self) -> str:
return self._description
@property
def parameters(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query (can be natural language question or keywords)"
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return (default: 10)",
"default": 10
},
"min_score": {
"type": "number",
"description": "Minimum relevance score (0-1, default: 0.3)",
"default": 0.3
}
},
"required": ["query"]
}
async def execute(self, **kwargs) -> str:
"""
Execute memory search
Args:
query: Search query
max_results: Maximum results
min_score: Minimum score
Returns:
Formatted search results
"""
query = kwargs.get("query")
max_results = kwargs.get("max_results", 10)
min_score = kwargs.get("min_score", 0.3)
if not query:
return "Error: query parameter is required"
try:
results = await self.memory_manager.search(
query=query,
user_id=self.user_id,
max_results=max_results,
min_score=min_score,
include_shared=True
)
if not results:
return f"No relevant memories found for query: {query}"
# Format results
output = [f"Found {len(results)} relevant memories:\n"]
for i, result in enumerate(results, 1):
output.append(f"\n{i}. {result.path} (lines {result.start_line}-{result.end_line})")
output.append(f" Score: {result.score:.3f}")
output.append(f" Snippet: {result.snippet}")
return "\n".join(output)
except Exception as e:
return f"Error searching memory: {str(e)}"

View File

@@ -46,6 +46,7 @@ class Edit(BaseTool):
def __init__(self, config: dict = None):
self.config = config or {}
self.cwd = self.config.get("cwd", os.getcwd())
self.memory_manager = self.config.get("memory_manager", None)
def execute(self, args: Dict[str, Any]) -> ToolResult:
"""
@@ -141,6 +142,14 @@ class Edit(BaseTool):
"first_changed_line": diff_result['first_changed_line']
}
# Notify memory manager if file is in memory directory
if self.memory_manager and "memory/" in path:
try:
self.memory_manager.mark_dirty()
except Exception as e:
# Don't fail the edit if memory notification fails
pass
return ToolResult.success(result)
except UnicodeDecodeError:

View File

@@ -22,7 +22,7 @@ class MemoryGetTool(BaseTool):
"properties": {
"path": {
"type": "string",
"description": "Relative path to the memory file (e.g., 'MEMORY.md', 'memory/2024-01-29.md')"
"description": "Relative path to the memory file (e.g., 'memory/MEMORY.md', 'memory/2024-01-29.md')"
},
"start_line": {
"type": "integer",
@@ -68,6 +68,11 @@ class MemoryGetTool(BaseTool):
try:
workspace_dir = self.memory_manager.config.get_workspace()
# Auto-prepend memory/ if not present and not absolute path
if not path.startswith('memory/') and not path.startswith('/'):
path = f'memory/{path}'
file_path = workspace_dir / path
if not file_path.exists():

View File

@@ -30,8 +30,8 @@ class MemorySearchTool(BaseTool):
},
"min_score": {
"type": "number",
"description": "Minimum relevance score (0-1, default: 0.3)",
"default": 0.3
"description": "Minimum relevance score (0-1, default: 0.1)",
"default": 0.1
}
},
"required": ["query"]
@@ -64,7 +64,7 @@ class MemorySearchTool(BaseTool):
query = args.get("query")
max_results = args.get("max_results", 10)
min_score = args.get("min_score", 0.3)
min_score = args.get("min_score", 0.1)
if not query:
return ToolResult.fail("Error: query parameter is required")

View File

@@ -34,6 +34,7 @@ class Write(BaseTool):
def __init__(self, config: dict = None):
self.config = config or {}
self.cwd = self.config.get("cwd", os.getcwd())
self.memory_manager = self.config.get("memory_manager", None)
def execute(self, args: Dict[str, Any]) -> ToolResult:
"""
@@ -64,6 +65,10 @@ class Write(BaseTool):
# Get bytes written
bytes_written = len(content.encode('utf-8'))
# Auto-sync to memory database if this is a memory file
if self.memory_manager and 'memory/' in path:
self.memory_manager.mark_dirty()
result = {
"message": f"Successfully wrote {bytes_written} bytes to {path}",
"path": path,

View File

@@ -1,5 +0,0 @@
# 2026-01-29 记录
## 老王的重要决定
- 今天老王告诉我他决定要学AI了这是一个重要的决策
- 这可能会是他学习和职业发展的一个转折点

View File

@@ -1,21 +0,0 @@
# Memory
Long-term curated memories and preferences.
## 用户信息
- 用户名:老王
## 用户信息
- 用户名:老王
## 用户偏好
- 喜欢吃红烧肉
- 爱打篮球
## 重要决策
- 决定要学习AI2026-01-29
## Notes
- Important decisions and facts go here
- This is your long-term knowledge base