fix(memory): address critical bugs from multi-agent review

Bug Fixes:
- Remove singleton pattern from consolidation/reflection services to
  prevent stale database session bugs (session is now passed per-request)
- Add LRU eviction to MemoryToolService._working dict (max 1000 sessions)
  to prevent unbounded memory growth
- Replace O(n) list.remove() with O(1) OrderedDict.move_to_end() in
  RetrievalCache for better performance under load
- Use deque with maxlen for metrics histograms to prevent unbounded
  memory growth (circular buffer with 10k max samples)
- Use full UUID for checkpoint IDs instead of 8-char prefix to avoid
  collision risk at scale (birthday paradox at ~50k checkpoints)

Test Updates:
- Update checkpoint test to expect 36-char UUID
- Update reflection singleton tests to expect new factory behavior
- Add reset_memory_reflection() no-op for backwards compatibility

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-05 18:55:32 +01:00
parent 35aea2d73a
commit 3edce9cd26
8 changed files with 86 additions and 78 deletions

View File

@@ -7,6 +7,7 @@ All tools are scoped to project/agent context for proper isolation.
"""
import logging
from collections import OrderedDict
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from typing import Any
@@ -83,6 +84,9 @@ class MemoryToolService:
This service coordinates between different memory types.
"""
# Maximum number of working memory sessions to cache (LRU eviction)
MAX_WORKING_SESSIONS = 1000
def __init__(
self,
session: AsyncSession,
@@ -98,8 +102,8 @@ class MemoryToolService:
self._session = session
self._embedding_generator = embedding_generator
# Lazy-initialized memory services
self._working: dict[str, WorkingMemory] = {} # keyed by session_id
# Lazy-initialized memory services with LRU eviction for working memory
self._working: OrderedDict[str, WorkingMemory] = OrderedDict()
self._episodic: EpisodicMemory | None = None
self._semantic: SemanticMemory | None = None
self._procedural: ProceduralMemory | None = None
@@ -110,14 +114,28 @@ class MemoryToolService:
project_id: UUID | None = None,
agent_instance_id: UUID | None = None,
) -> WorkingMemory:
"""Get or create working memory for a session."""
if session_id not in self._working:
self._working[session_id] = await WorkingMemory.for_session(
session_id=session_id,
project_id=str(project_id) if project_id else None,
agent_instance_id=str(agent_instance_id) if agent_instance_id else None,
)
return self._working[session_id]
"""Get or create working memory for a session with LRU eviction."""
if session_id in self._working:
# Move to end (most recently used)
self._working.move_to_end(session_id)
return self._working[session_id]
# Evict oldest entries if at capacity
while len(self._working) >= self.MAX_WORKING_SESSIONS:
oldest_id, oldest_memory = self._working.popitem(last=False)
try:
await oldest_memory.close()
except Exception as e:
logger.warning(f"Error closing evicted working memory {oldest_id}: {e}")
# Create new working memory
working = await WorkingMemory.for_session(
session_id=session_id,
project_id=str(project_id) if project_id else None,
agent_instance_id=str(agent_instance_id) if agent_instance_id else None,
)
self._working[session_id] = working
return working
async def _get_episodic(self) -> EpisodicMemory:
"""Get or create episodic memory service."""