Files
syndarix/backend/app/services/memory/cache/hot_cache.py
Felipe Cardoso 6954774e36 feat(memory): implement caching layer for memory operations (#98)
Add comprehensive caching layer for the Agent Memory System:

- HotMemoryCache: LRU cache for frequently accessed memories
  - Python 3.12 type parameter syntax
  - Thread-safe operations with RLock
  - TTL-based expiration
  - Access count tracking for hot memory identification
  - Scoped invalidation by type, scope, or pattern

- EmbeddingCache: Cache embeddings by content hash
  - Content-hash based deduplication
  - Optional Redis backing for persistence
  - LRU eviction with configurable max size
  - CachedEmbeddingGenerator wrapper for transparent caching

- CacheManager: Unified cache management
  - Coordinates hot cache, embedding cache, and retrieval cache
  - Centralized invalidation across all caches
  - Aggregated statistics and hit rate tracking
  - Automatic cleanup scheduling
  - Cache warmup support

Performance targets:
- Cache hit rate > 80% for hot memories
- Cache operations < 1ms (memory), < 5ms (Redis)

83 new tests with comprehensive coverage.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 04:04:13 +01:00

464 lines
12 KiB
Python

# app/services/memory/cache/hot_cache.py
"""
Hot Memory Cache.
LRU cache for frequently accessed memories.
Provides fast access to recently used memories without database queries.
"""
import logging
import threading
from collections import OrderedDict
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import Any
from uuid import UUID
logger = logging.getLogger(__name__)
def _utcnow() -> datetime:
"""Get current UTC time as timezone-aware datetime."""
return datetime.now(UTC)
@dataclass
class CacheEntry[T]:
"""A cached memory entry with metadata."""
value: T
created_at: datetime
last_accessed_at: datetime
access_count: int = 1
ttl_seconds: float = 300.0
def is_expired(self) -> bool:
"""Check if this entry has expired."""
age = (_utcnow() - self.created_at).total_seconds()
return age > self.ttl_seconds
def touch(self) -> None:
"""Update access time and count."""
self.last_accessed_at = _utcnow()
self.access_count += 1
@dataclass
class CacheKey:
"""A structured cache key with components."""
memory_type: str
memory_id: str
scope: str | None = None
def __hash__(self) -> int:
return hash((self.memory_type, self.memory_id, self.scope))
def __eq__(self, other: object) -> bool:
if not isinstance(other, CacheKey):
return False
return (
self.memory_type == other.memory_type
and self.memory_id == other.memory_id
and self.scope == other.scope
)
def __str__(self) -> str:
if self.scope:
return f"{self.memory_type}:{self.scope}:{self.memory_id}"
return f"{self.memory_type}:{self.memory_id}"
@dataclass
class HotCacheStats:
"""Statistics for the hot memory cache."""
hits: int = 0
misses: int = 0
evictions: int = 0
expirations: int = 0
current_size: int = 0
max_size: int = 0
@property
def hit_rate(self) -> float:
"""Calculate cache hit rate."""
total = self.hits + self.misses
if total == 0:
return 0.0
return self.hits / total
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary."""
return {
"hits": self.hits,
"misses": self.misses,
"evictions": self.evictions,
"expirations": self.expirations,
"current_size": self.current_size,
"max_size": self.max_size,
"hit_rate": self.hit_rate,
}
class HotMemoryCache[T]:
"""
LRU cache for frequently accessed memories.
Features:
- LRU eviction when capacity is reached
- TTL-based expiration
- Access count tracking for hot memory identification
- Thread-safe operations
- Scoped invalidation
Performance targets:
- Cache hit rate > 80% for hot memories
- Get/put operations < 1ms
"""
def __init__(
self,
max_size: int = 10000,
default_ttl_seconds: float = 300.0,
) -> None:
"""
Initialize the hot memory cache.
Args:
max_size: Maximum number of entries
default_ttl_seconds: Default TTL for entries (5 minutes)
"""
self._max_size = max_size
self._default_ttl = default_ttl_seconds
self._cache: OrderedDict[CacheKey, CacheEntry[T]] = OrderedDict()
self._lock = threading.RLock()
self._stats = HotCacheStats(max_size=max_size)
logger.info(
f"Initialized HotMemoryCache with max_size={max_size}, "
f"ttl={default_ttl_seconds}s"
)
def get(self, key: CacheKey) -> T | None:
"""
Get a memory from cache.
Args:
key: Cache key
Returns:
Cached value or None if not found/expired
"""
with self._lock:
if key not in self._cache:
self._stats.misses += 1
return None
entry = self._cache[key]
# Check expiration
if entry.is_expired():
del self._cache[key]
self._stats.expirations += 1
self._stats.misses += 1
self._stats.current_size = len(self._cache)
return None
# Move to end (most recently used)
self._cache.move_to_end(key)
entry.touch()
self._stats.hits += 1
return entry.value
def get_by_id(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> T | None:
"""
Get a memory by type and ID.
Args:
memory_type: Type of memory (episodic, semantic, procedural)
memory_id: Memory ID
scope: Optional scope (project_id, agent_id)
Returns:
Cached value or None if not found/expired
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
return self.get(key)
def put(
self,
key: CacheKey,
value: T,
ttl_seconds: float | None = None,
) -> None:
"""
Put a memory into cache.
Args:
key: Cache key
value: Value to cache
ttl_seconds: Optional TTL override
"""
with self._lock:
# Evict if at capacity
self._evict_if_needed()
now = _utcnow()
entry = CacheEntry(
value=value,
created_at=now,
last_accessed_at=now,
access_count=1,
ttl_seconds=ttl_seconds or self._default_ttl,
)
self._cache[key] = entry
self._cache.move_to_end(key)
self._stats.current_size = len(self._cache)
def put_by_id(
self,
memory_type: str,
memory_id: UUID | str,
value: T,
scope: str | None = None,
ttl_seconds: float | None = None,
) -> None:
"""
Put a memory by type and ID.
Args:
memory_type: Type of memory
memory_id: Memory ID
value: Value to cache
scope: Optional scope
ttl_seconds: Optional TTL override
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
self.put(key, value, ttl_seconds)
def _evict_if_needed(self) -> None:
"""Evict entries if cache is at capacity."""
while len(self._cache) >= self._max_size:
# Remove least recently used (first item)
if self._cache:
self._cache.popitem(last=False)
self._stats.evictions += 1
def invalidate(self, key: CacheKey) -> bool:
"""
Invalidate a specific cache entry.
Args:
key: Cache key to invalidate
Returns:
True if entry was found and removed
"""
with self._lock:
if key in self._cache:
del self._cache[key]
self._stats.current_size = len(self._cache)
return True
return False
def invalidate_by_id(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> bool:
"""
Invalidate a memory by type and ID.
Args:
memory_type: Type of memory
memory_id: Memory ID
scope: Optional scope
Returns:
True if entry was found and removed
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
return self.invalidate(key)
def invalidate_by_type(self, memory_type: str) -> int:
"""
Invalidate all entries of a memory type.
Args:
memory_type: Type of memory to invalidate
Returns:
Number of entries invalidated
"""
with self._lock:
keys_to_remove = [
k for k in self._cache.keys() if k.memory_type == memory_type
]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def invalidate_by_scope(self, scope: str) -> int:
"""
Invalidate all entries in a scope.
Args:
scope: Scope to invalidate (e.g., project_id)
Returns:
Number of entries invalidated
"""
with self._lock:
keys_to_remove = [k for k in self._cache.keys() if k.scope == scope]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def invalidate_pattern(self, pattern: str) -> int:
"""
Invalidate entries matching a pattern.
Pattern can include * as wildcard.
Args:
pattern: Pattern to match (e.g., "episodic:*")
Returns:
Number of entries invalidated
"""
import fnmatch
with self._lock:
keys_to_remove = [
k for k in self._cache.keys() if fnmatch.fnmatch(str(k), pattern)
]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def clear(self) -> int:
"""
Clear all cache entries.
Returns:
Number of entries cleared
"""
with self._lock:
count = len(self._cache)
self._cache.clear()
self._stats.current_size = 0
logger.info(f"Cleared {count} entries from hot cache")
return count
def cleanup_expired(self) -> int:
"""
Remove all expired entries.
Returns:
Number of entries removed
"""
with self._lock:
keys_to_remove = [
k for k, v in self._cache.items() if v.is_expired()
]
for key in keys_to_remove:
del self._cache[key]
self._stats.expirations += 1
self._stats.current_size = len(self._cache)
if keys_to_remove:
logger.debug(f"Cleaned up {len(keys_to_remove)} expired entries")
return len(keys_to_remove)
def get_hot_memories(self, limit: int = 10) -> list[tuple[CacheKey, int]]:
"""
Get the most frequently accessed memories.
Args:
limit: Maximum number of memories to return
Returns:
List of (key, access_count) tuples sorted by access count
"""
with self._lock:
entries = [
(k, v.access_count)
for k, v in self._cache.items()
if not v.is_expired()
]
entries.sort(key=lambda x: x[1], reverse=True)
return entries[:limit]
def get_stats(self) -> HotCacheStats:
"""Get cache statistics."""
with self._lock:
self._stats.current_size = len(self._cache)
return self._stats
def reset_stats(self) -> None:
"""Reset cache statistics."""
with self._lock:
self._stats = HotCacheStats(
max_size=self._max_size,
current_size=len(self._cache),
)
@property
def size(self) -> int:
"""Get current cache size."""
return len(self._cache)
@property
def max_size(self) -> int:
"""Get maximum cache size."""
return self._max_size
# Factory function for typed caches
def create_hot_cache(
max_size: int = 10000,
default_ttl_seconds: float = 300.0,
) -> HotMemoryCache[Any]:
"""
Create a hot memory cache.
Args:
max_size: Maximum number of entries
default_ttl_seconds: Default TTL for entries
Returns:
Configured HotMemoryCache instance
"""
return HotMemoryCache(
max_size=max_size,
default_ttl_seconds=default_ttl_seconds,
)