fix(memory): address critical bugs from multi-agent review

Bug Fixes: - Remove singleton pattern from consolidation/reflection services to prevent stale database session bugs (session is now passed per-request) - Add LRU eviction to MemoryToolService._working dict (max 1000 sessions) to prevent unbounded memory growth - Replace O(n) list.remove() with O(1) OrderedDict.move_to_end() in RetrievalCache for better performance under load - Use deque with maxlen for metrics histograms to prevent unbounded memory growth (circular buffer with 10k max samples) - Use full UUID for checkpoint IDs instead of 8-char prefix to avoid collision risk at scale (birthday paradox at ~50k checkpoints) Test Updates: - Update checkpoint test to expect 36-char UUID - Update reflection singleton tests to expect new factory behavior - Add reset_memory_reflection() no-op for backwards compatibility 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 18:55:32 +01:00
parent 35aea2d73a
commit 3edce9cd26
8 changed files with 86 additions and 78 deletions
--- a/backend/app/services/memory/mcp/service.py
+++ b/backend/app/services/memory/mcp/service.py
@@ -7,6 +7,7 @@ All tools are scoped to project/agent context for proper isolation.
 """

 import logging
+from collections import OrderedDict
 from dataclasses import dataclass
 from datetime import UTC, datetime, timedelta
 from typing import Any
@@ -83,6 +84,9 @@ class MemoryToolService:
    This service coordinates between different memory types.
    """

+    # Maximum number of working memory sessions to cache (LRU eviction)
+    MAX_WORKING_SESSIONS = 1000
+
    def __init__(
        self,
        session: AsyncSession,
@@ -98,8 +102,8 @@ class MemoryToolService:
        self._session = session
        self._embedding_generator = embedding_generator

-        # Lazy-initialized memory services
-        self._working: dict[str, WorkingMemory] = {}  # keyed by session_id
+        # Lazy-initialized memory services with LRU eviction for working memory
+        self._working: OrderedDict[str, WorkingMemory] = OrderedDict()
        self._episodic: EpisodicMemory | None = None
        self._semantic: SemanticMemory | None = None
        self._procedural: ProceduralMemory | None = None
@@ -110,14 +114,28 @@ class MemoryToolService:
        project_id: UUID | None = None,
        agent_instance_id: UUID | None = None,
    ) -> WorkingMemory:
-        """Get or create working memory for a session."""
-        if session_id not in self._working:
-            self._working[session_id] = await WorkingMemory.for_session(
-                session_id=session_id,
-                project_id=str(project_id) if project_id else None,
-                agent_instance_id=str(agent_instance_id) if agent_instance_id else None,
-            )
-        return self._working[session_id]
+        """Get or create working memory for a session with LRU eviction."""
+        if session_id in self._working:
+            # Move to end (most recently used)
+            self._working.move_to_end(session_id)
+            return self._working[session_id]
+
+        # Evict oldest entries if at capacity
+        while len(self._working) >= self.MAX_WORKING_SESSIONS:
+            oldest_id, oldest_memory = self._working.popitem(last=False)
+            try:
+                await oldest_memory.close()
+            except Exception as e:
+                logger.warning(f"Error closing evicted working memory {oldest_id}: {e}")
+
+        # Create new working memory
+        working = await WorkingMemory.for_session(
+            session_id=session_id,
+            project_id=str(project_id) if project_id else None,
+            agent_instance_id=str(agent_instance_id) if agent_instance_id else None,
+        )
+        self._working[session_id] = working
+        return working

    async def _get_episodic(self) -> EpisodicMemory:
        """Get or create episodic memory service."""