fix(memory): address critical bugs from multi-agent review

Bug Fixes:
- Remove singleton pattern from consolidation/reflection services to
  prevent stale database session bugs (session is now passed per-request)
- Add LRU eviction to MemoryToolService._working dict (max 1000 sessions)
  to prevent unbounded memory growth
- Replace O(n) list.remove() with O(1) OrderedDict.move_to_end() in
  RetrievalCache for better performance under load
- Use deque with maxlen for metrics histograms to prevent unbounded
  memory growth (circular buffer with 10k max samples)
- Use full UUID for checkpoint IDs instead of 8-char prefix to avoid
  collision risk at scale (birthday paradox at ~50k checkpoints)

Test Updates:
- Update checkpoint test to expect 36-char UUID
- Update reflection singleton tests to expect new factory behavior
- Add reset_memory_reflection() no-op for backwards compatibility

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-05 18:55:32 +01:00
parent 35aea2d73a
commit 3edce9cd26
8 changed files with 86 additions and 78 deletions

View File

@@ -13,6 +13,7 @@ Provides hybrid retrieval capabilities combining:
import hashlib
import logging
from collections import OrderedDict
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any, TypeVar
@@ -243,7 +244,8 @@ class RetrievalCache:
"""
In-memory cache for retrieval results.
Supports TTL-based expiration and LRU eviction.
Supports TTL-based expiration and LRU eviction with O(1) operations.
Uses OrderedDict for efficient LRU tracking.
"""
def __init__(
@@ -258,10 +260,10 @@ class RetrievalCache:
max_entries: Maximum cache entries
default_ttl_seconds: Default TTL for entries
"""
self._cache: dict[str, CacheEntry] = {}
# OrderedDict maintains insertion order; we use move_to_end for O(1) LRU
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
self._max_entries = max_entries
self._default_ttl = default_ttl_seconds
self._access_order: list[str] = []
logger.info(
f"Initialized RetrievalCache with max_entries={max_entries}, "
f"ttl={default_ttl_seconds}s"
@@ -283,14 +285,10 @@ class RetrievalCache:
entry = self._cache[query_key]
if entry.is_expired():
del self._cache[query_key]
if query_key in self._access_order:
self._access_order.remove(query_key)
return None
# Update access order (LRU)
if query_key in self._access_order:
self._access_order.remove(query_key)
self._access_order.append(query_key)
# Update access order (LRU) - O(1) with OrderedDict
self._cache.move_to_end(query_key)
logger.debug(f"Cache hit for {query_key}")
return entry.results
@@ -309,11 +307,9 @@ class RetrievalCache:
results: Results to cache
ttl_seconds: TTL for this entry (or default)
"""
# Evict if at capacity
while len(self._cache) >= self._max_entries and self._access_order:
oldest_key = self._access_order.pop(0)
if oldest_key in self._cache:
del self._cache[oldest_key]
# Evict oldest entries if at capacity - O(1) with popitem(last=False)
while len(self._cache) >= self._max_entries:
self._cache.popitem(last=False)
entry = CacheEntry(
results=results,
@@ -323,7 +319,6 @@ class RetrievalCache:
)
self._cache[query_key] = entry
self._access_order.append(query_key)
logger.debug(f"Cached {len(results)} results for {query_key}")
def invalidate(self, query_key: str) -> bool:
@@ -338,8 +333,6 @@ class RetrievalCache:
"""
if query_key in self._cache:
del self._cache[query_key]
if query_key in self._access_order:
self._access_order.remove(query_key)
return True
return False
@@ -376,7 +369,6 @@ class RetrievalCache:
"""
count = len(self._cache)
self._cache.clear()
self._access_order.clear()
logger.info(f"Cleared {count} cache entries")
return count