feat(memory): implement caching layer for memory operations (#98)

Add comprehensive caching layer for the Agent Memory System: - HotMemoryCache: LRU cache for frequently accessed memories - Python 3.12 type parameter syntax - Thread-safe operations with RLock - TTL-based expiration - Access count tracking for hot memory identification - Scoped invalidation by type, scope, or pattern - EmbeddingCache: Cache embeddings by content hash - Content-hash based deduplication - Optional Redis backing for persistence - LRU eviction with configurable max size - CachedEmbeddingGenerator wrapper for transparent caching - CacheManager: Unified cache management - Coordinates hot cache, embedding cache, and retrieval cache - Centralized invalidation across all caches - Aggregated statistics and hit rate tracking - Automatic cleanup scheduling - Cache warmup support Performance targets: - Cache hit rate > 80% for hot memories - Cache operations < 1ms (memory), < 5ms (Redis) 83 new tests with comprehensive coverage. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 04:04:13 +01:00
parent 30e5c68304
commit 6954774e36
8 changed files with 2690 additions and 0 deletions
--- a/backend/app/services/memory/cache/hot_cache.py
+++ b/backend/app/services/memory/cache/hot_cache.py
@@ -0,0 +1,463 @@
+# app/services/memory/cache/hot_cache.py
+"""
+Hot Memory Cache.
+
+LRU cache for frequently accessed memories.
+Provides fast access to recently used memories without database queries.
+"""
+
+import logging
+import threading
+from collections import OrderedDict
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from typing import Any
+from uuid import UUID
+
+logger = logging.getLogger(__name__)
+
+
+def _utcnow() -> datetime:
+    """Get current UTC time as timezone-aware datetime."""
+    return datetime.now(UTC)
+
+
+@dataclass
+class CacheEntry[T]:
+    """A cached memory entry with metadata."""
+
+    value: T
+    created_at: datetime
+    last_accessed_at: datetime
+    access_count: int = 1
+    ttl_seconds: float = 300.0
+
+    def is_expired(self) -> bool:
+        """Check if this entry has expired."""
+        age = (_utcnow() - self.created_at).total_seconds()
+        return age > self.ttl_seconds
+
+    def touch(self) -> None:
+        """Update access time and count."""
+        self.last_accessed_at = _utcnow()
+        self.access_count += 1
+
+
+@dataclass
+class CacheKey:
+    """A structured cache key with components."""
+
+    memory_type: str
+    memory_id: str
+    scope: str | None = None
+
+    def __hash__(self) -> int:
+        return hash((self.memory_type, self.memory_id, self.scope))
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, CacheKey):
+            return False
+        return (
+            self.memory_type == other.memory_type
+            and self.memory_id == other.memory_id
+            and self.scope == other.scope
+        )
+
+    def __str__(self) -> str:
+        if self.scope:
+            return f"{self.memory_type}:{self.scope}:{self.memory_id}"
+        return f"{self.memory_type}:{self.memory_id}"
+
+
+@dataclass
+class HotCacheStats:
+    """Statistics for the hot memory cache."""
+
+    hits: int = 0
+    misses: int = 0
+    evictions: int = 0
+    expirations: int = 0
+    current_size: int = 0
+    max_size: int = 0
+
+    @property
+    def hit_rate(self) -> float:
+        """Calculate cache hit rate."""
+        total = self.hits + self.misses
+        if total == 0:
+            return 0.0
+        return self.hits / total
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "hits": self.hits,
+            "misses": self.misses,
+            "evictions": self.evictions,
+            "expirations": self.expirations,
+            "current_size": self.current_size,
+            "max_size": self.max_size,
+            "hit_rate": self.hit_rate,
+        }
+
+
+class HotMemoryCache[T]:
+    """
+    LRU cache for frequently accessed memories.
+
+    Features:
+    - LRU eviction when capacity is reached
+    - TTL-based expiration
+    - Access count tracking for hot memory identification
+    - Thread-safe operations
+    - Scoped invalidation
+
+    Performance targets:
+    - Cache hit rate > 80% for hot memories
+    - Get/put operations < 1ms
+    """
+
+    def __init__(
+        self,
+        max_size: int = 10000,
+        default_ttl_seconds: float = 300.0,
+    ) -> None:
+        """
+        Initialize the hot memory cache.
+
+        Args:
+            max_size: Maximum number of entries
+            default_ttl_seconds: Default TTL for entries (5 minutes)
+        """
+        self._max_size = max_size
+        self._default_ttl = default_ttl_seconds
+        self._cache: OrderedDict[CacheKey, CacheEntry[T]] = OrderedDict()
+        self._lock = threading.RLock()
+        self._stats = HotCacheStats(max_size=max_size)
+        logger.info(
+            f"Initialized HotMemoryCache with max_size={max_size}, "
+            f"ttl={default_ttl_seconds}s"
+        )
+
+    def get(self, key: CacheKey) -> T | None:
+        """
+        Get a memory from cache.
+
+        Args:
+            key: Cache key
+
+        Returns:
+            Cached value or None if not found/expired
+        """
+        with self._lock:
+            if key not in self._cache:
+                self._stats.misses += 1
+                return None
+
+            entry = self._cache[key]
+
+            # Check expiration
+            if entry.is_expired():
+                del self._cache[key]
+                self._stats.expirations += 1
+                self._stats.misses += 1
+                self._stats.current_size = len(self._cache)
+                return None
+
+            # Move to end (most recently used)
+            self._cache.move_to_end(key)
+            entry.touch()
+
+            self._stats.hits += 1
+            return entry.value
+
+    def get_by_id(
+        self,
+        memory_type: str,
+        memory_id: UUID | str,
+        scope: str | None = None,
+    ) -> T | None:
+        """
+        Get a memory by type and ID.
+
+        Args:
+            memory_type: Type of memory (episodic, semantic, procedural)
+            memory_id: Memory ID
+            scope: Optional scope (project_id, agent_id)
+
+        Returns:
+            Cached value or None if not found/expired
+        """
+        key = CacheKey(
+            memory_type=memory_type,
+            memory_id=str(memory_id),
+            scope=scope,
+        )
+        return self.get(key)
+
+    def put(
+        self,
+        key: CacheKey,
+        value: T,
+        ttl_seconds: float | None = None,
+    ) -> None:
+        """
+        Put a memory into cache.
+
+        Args:
+            key: Cache key
+            value: Value to cache
+            ttl_seconds: Optional TTL override
+        """
+        with self._lock:
+            # Evict if at capacity
+            self._evict_if_needed()
+
+            now = _utcnow()
+            entry = CacheEntry(
+                value=value,
+                created_at=now,
+                last_accessed_at=now,
+                access_count=1,
+                ttl_seconds=ttl_seconds or self._default_ttl,
+            )
+
+            self._cache[key] = entry
+            self._cache.move_to_end(key)
+            self._stats.current_size = len(self._cache)
+
+    def put_by_id(
+        self,
+        memory_type: str,
+        memory_id: UUID | str,
+        value: T,
+        scope: str | None = None,
+        ttl_seconds: float | None = None,
+    ) -> None:
+        """
+        Put a memory by type and ID.
+
+        Args:
+            memory_type: Type of memory
+            memory_id: Memory ID
+            value: Value to cache
+            scope: Optional scope
+            ttl_seconds: Optional TTL override
+        """
+        key = CacheKey(
+            memory_type=memory_type,
+            memory_id=str(memory_id),
+            scope=scope,
+        )
+        self.put(key, value, ttl_seconds)
+
+    def _evict_if_needed(self) -> None:
+        """Evict entries if cache is at capacity."""
+        while len(self._cache) >= self._max_size:
+            # Remove least recently used (first item)
+            if self._cache:
+                self._cache.popitem(last=False)
+                self._stats.evictions += 1
+
+    def invalidate(self, key: CacheKey) -> bool:
+        """
+        Invalidate a specific cache entry.
+
+        Args:
+            key: Cache key to invalidate
+
+        Returns:
+            True if entry was found and removed
+        """
+        with self._lock:
+            if key in self._cache:
+                del self._cache[key]
+                self._stats.current_size = len(self._cache)
+                return True
+            return False
+
+    def invalidate_by_id(
+        self,
+        memory_type: str,
+        memory_id: UUID | str,
+        scope: str | None = None,
+    ) -> bool:
+        """
+        Invalidate a memory by type and ID.
+
+        Args:
+            memory_type: Type of memory
+            memory_id: Memory ID
+            scope: Optional scope
+
+        Returns:
+            True if entry was found and removed
+        """
+        key = CacheKey(
+            memory_type=memory_type,
+            memory_id=str(memory_id),
+            scope=scope,
+        )
+        return self.invalidate(key)
+
+    def invalidate_by_type(self, memory_type: str) -> int:
+        """
+        Invalidate all entries of a memory type.
+
+        Args:
+            memory_type: Type of memory to invalidate
+
+        Returns:
+            Number of entries invalidated
+        """
+        with self._lock:
+            keys_to_remove = [
+                k for k in self._cache.keys() if k.memory_type == memory_type
+            ]
+            for key in keys_to_remove:
+                del self._cache[key]
+
+            self._stats.current_size = len(self._cache)
+            return len(keys_to_remove)
+
+    def invalidate_by_scope(self, scope: str) -> int:
+        """
+        Invalidate all entries in a scope.
+
+        Args:
+            scope: Scope to invalidate (e.g., project_id)
+
+        Returns:
+            Number of entries invalidated
+        """
+        with self._lock:
+            keys_to_remove = [k for k in self._cache.keys() if k.scope == scope]
+            for key in keys_to_remove:
+                del self._cache[key]
+
+            self._stats.current_size = len(self._cache)
+            return len(keys_to_remove)
+
+    def invalidate_pattern(self, pattern: str) -> int:
+        """
+        Invalidate entries matching a pattern.
+
+        Pattern can include * as wildcard.
+
+        Args:
+            pattern: Pattern to match (e.g., "episodic:*")
+
+        Returns:
+            Number of entries invalidated
+        """
+        import fnmatch
+
+        with self._lock:
+            keys_to_remove = [
+                k for k in self._cache.keys() if fnmatch.fnmatch(str(k), pattern)
+            ]
+            for key in keys_to_remove:
+                del self._cache[key]
+
+            self._stats.current_size = len(self._cache)
+            return len(keys_to_remove)
+
+    def clear(self) -> int:
+        """
+        Clear all cache entries.
+
+        Returns:
+            Number of entries cleared
+        """
+        with self._lock:
+            count = len(self._cache)
+            self._cache.clear()
+            self._stats.current_size = 0
+            logger.info(f"Cleared {count} entries from hot cache")
+            return count
+
+    def cleanup_expired(self) -> int:
+        """
+        Remove all expired entries.
+
+        Returns:
+            Number of entries removed
+        """
+        with self._lock:
+            keys_to_remove = [
+                k for k, v in self._cache.items() if v.is_expired()
+            ]
+            for key in keys_to_remove:
+                del self._cache[key]
+                self._stats.expirations += 1
+
+            self._stats.current_size = len(self._cache)
+
+            if keys_to_remove:
+                logger.debug(f"Cleaned up {len(keys_to_remove)} expired entries")
+
+            return len(keys_to_remove)
+
+    def get_hot_memories(self, limit: int = 10) -> list[tuple[CacheKey, int]]:
+        """
+        Get the most frequently accessed memories.
+
+        Args:
+            limit: Maximum number of memories to return
+
+        Returns:
+            List of (key, access_count) tuples sorted by access count
+        """
+        with self._lock:
+            entries = [
+                (k, v.access_count)
+                for k, v in self._cache.items()
+                if not v.is_expired()
+            ]
+            entries.sort(key=lambda x: x[1], reverse=True)
+            return entries[:limit]
+
+    def get_stats(self) -> HotCacheStats:
+        """Get cache statistics."""
+        with self._lock:
+            self._stats.current_size = len(self._cache)
+            return self._stats
+
+    def reset_stats(self) -> None:
+        """Reset cache statistics."""
+        with self._lock:
+            self._stats = HotCacheStats(
+                max_size=self._max_size,
+                current_size=len(self._cache),
+            )
+
+    @property
+    def size(self) -> int:
+        """Get current cache size."""
+        return len(self._cache)
+
+    @property
+    def max_size(self) -> int:
+        """Get maximum cache size."""
+        return self._max_size
+
+
+# Factory function for typed caches
+def create_hot_cache(
+    max_size: int = 10000,
+    default_ttl_seconds: float = 300.0,
+) -> HotMemoryCache[Any]:
+    """
+    Create a hot memory cache.
+
+    Args:
+        max_size: Maximum number of entries
+        default_ttl_seconds: Default TTL for entries
+
+    Returns:
+        Configured HotMemoryCache instance
+    """
+    return HotMemoryCache(
+        max_size=max_size,
+        default_ttl_seconds=default_ttl_seconds,
+    )