feat(memory): implement caching layer for memory operations (#98)

Add comprehensive caching layer for the Agent Memory System:

- HotMemoryCache: LRU cache for frequently accessed memories
  - Python 3.12 type parameter syntax
  - Thread-safe operations with RLock
  - TTL-based expiration
  - Access count tracking for hot memory identification
  - Scoped invalidation by type, scope, or pattern

- EmbeddingCache: Cache embeddings by content hash
  - Content-hash based deduplication
  - Optional Redis backing for persistence
  - LRU eviction with configurable max size
  - CachedEmbeddingGenerator wrapper for transparent caching

- CacheManager: Unified cache management
  - Coordinates hot cache, embedding cache, and retrieval cache
  - Centralized invalidation across all caches
  - Aggregated statistics and hit rate tracking
  - Automatic cleanup scheduling
  - Cache warmup support

Performance targets:
- Cache hit rate > 80% for hot memories
- Cache operations < 1ms (memory), < 5ms (Redis)

83 new tests with comprehensive coverage.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-05 04:04:13 +01:00
parent 30e5c68304
commit 6954774e36
8 changed files with 2690 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
# app/services/memory/cache/__init__.py
"""
Memory Caching Layer.
Provides caching for memory operations:
- Hot Memory Cache: LRU cache for frequently accessed memories
- Embedding Cache: Cache embeddings by content hash
- Cache Manager: Unified cache management with invalidation
"""
from .cache_manager import CacheManager, CacheStats, get_cache_manager
from .embedding_cache import EmbeddingCache
from .hot_cache import HotMemoryCache
__all__ = [
"CacheManager",
"CacheStats",
"EmbeddingCache",
"HotMemoryCache",
"get_cache_manager",
]

View File

@@ -0,0 +1,500 @@
# app/services/memory/cache/cache_manager.py
"""
Cache Manager.
Unified cache management for memory operations.
Coordinates hot cache, embedding cache, and retrieval cache.
Provides centralized invalidation and statistics.
"""
import logging
import threading
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import TYPE_CHECKING, Any
from uuid import UUID
from app.services.memory.config import get_memory_settings
from .embedding_cache import EmbeddingCache, create_embedding_cache
from .hot_cache import CacheKey, HotMemoryCache, create_hot_cache
if TYPE_CHECKING:
from redis.asyncio import Redis
from app.services.memory.indexing.retrieval import RetrievalCache
logger = logging.getLogger(__name__)
def _utcnow() -> datetime:
"""Get current UTC time as timezone-aware datetime."""
return datetime.now(UTC)
@dataclass
class CacheStats:
"""Aggregated cache statistics."""
hot_cache: dict[str, Any] = field(default_factory=dict)
embedding_cache: dict[str, Any] = field(default_factory=dict)
retrieval_cache: dict[str, Any] = field(default_factory=dict)
overall_hit_rate: float = 0.0
last_cleanup: datetime | None = None
cleanup_count: int = 0
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary."""
return {
"hot_cache": self.hot_cache,
"embedding_cache": self.embedding_cache,
"retrieval_cache": self.retrieval_cache,
"overall_hit_rate": self.overall_hit_rate,
"last_cleanup": self.last_cleanup.isoformat() if self.last_cleanup else None,
"cleanup_count": self.cleanup_count,
}
class CacheManager:
"""
Unified cache manager for memory operations.
Provides:
- Centralized cache configuration
- Coordinated invalidation across caches
- Aggregated statistics
- Automatic cleanup scheduling
Performance targets:
- Overall cache hit rate > 80%
- Cache operations < 1ms (memory), < 5ms (Redis)
"""
def __init__(
self,
hot_cache: HotMemoryCache[Any] | None = None,
embedding_cache: EmbeddingCache | None = None,
retrieval_cache: "RetrievalCache | None" = None,
redis: "Redis | None" = None,
) -> None:
"""
Initialize the cache manager.
Args:
hot_cache: Optional pre-configured hot cache
embedding_cache: Optional pre-configured embedding cache
retrieval_cache: Optional pre-configured retrieval cache
redis: Optional Redis connection for persistence
"""
self._settings = get_memory_settings()
self._redis = redis
self._enabled = self._settings.cache_enabled
# Initialize caches
if hot_cache:
self._hot_cache = hot_cache
else:
self._hot_cache = create_hot_cache(
max_size=self._settings.cache_max_items,
default_ttl_seconds=self._settings.cache_ttl_seconds,
)
if embedding_cache:
self._embedding_cache = embedding_cache
else:
self._embedding_cache = create_embedding_cache(
max_size=self._settings.cache_max_items,
default_ttl_seconds=self._settings.cache_ttl_seconds * 12, # 1hr for embeddings
redis=redis,
)
self._retrieval_cache = retrieval_cache
# Stats tracking
self._last_cleanup: datetime | None = None
self._cleanup_count = 0
self._lock = threading.RLock()
logger.info(
f"Initialized CacheManager: enabled={self._enabled}, "
f"redis={'connected' if redis else 'disabled'}"
)
def set_redis(self, redis: "Redis") -> None:
"""Set Redis connection for all caches."""
self._redis = redis
self._embedding_cache.set_redis(redis)
def set_retrieval_cache(self, cache: "RetrievalCache") -> None:
"""Set retrieval cache instance."""
self._retrieval_cache = cache
@property
def is_enabled(self) -> bool:
"""Check if caching is enabled."""
return self._enabled
@property
def hot_cache(self) -> HotMemoryCache[Any]:
"""Get the hot memory cache."""
return self._hot_cache
@property
def embedding_cache(self) -> EmbeddingCache:
"""Get the embedding cache."""
return self._embedding_cache
@property
def retrieval_cache(self) -> "RetrievalCache | None":
"""Get the retrieval cache."""
return self._retrieval_cache
# =========================================================================
# Hot Memory Cache Operations
# =========================================================================
def get_memory(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> Any | None:
"""
Get a memory from hot cache.
Args:
memory_type: Type of memory
memory_id: Memory ID
scope: Optional scope
Returns:
Cached memory or None
"""
if not self._enabled:
return None
return self._hot_cache.get_by_id(memory_type, memory_id, scope)
def cache_memory(
self,
memory_type: str,
memory_id: UUID | str,
memory: Any,
scope: str | None = None,
ttl_seconds: float | None = None,
) -> None:
"""
Cache a memory in hot cache.
Args:
memory_type: Type of memory
memory_id: Memory ID
memory: Memory object
scope: Optional scope
ttl_seconds: Optional TTL override
"""
if not self._enabled:
return
self._hot_cache.put_by_id(memory_type, memory_id, memory, scope, ttl_seconds)
# =========================================================================
# Embedding Cache Operations
# =========================================================================
async def get_embedding(
self,
content: str,
model: str = "default",
) -> list[float] | None:
"""
Get a cached embedding.
Args:
content: Content text
model: Model name
Returns:
Cached embedding or None
"""
if not self._enabled:
return None
return await self._embedding_cache.get(content, model)
async def cache_embedding(
self,
content: str,
embedding: list[float],
model: str = "default",
ttl_seconds: float | None = None,
) -> str:
"""
Cache an embedding.
Args:
content: Content text
embedding: Embedding vector
model: Model name
ttl_seconds: Optional TTL override
Returns:
Content hash
"""
if not self._enabled:
return EmbeddingCache.hash_content(content)
return await self._embedding_cache.put(content, embedding, model, ttl_seconds)
# =========================================================================
# Invalidation
# =========================================================================
async def invalidate_memory(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> int:
"""
Invalidate a memory across all caches.
Args:
memory_type: Type of memory
memory_id: Memory ID
scope: Optional scope
Returns:
Number of entries invalidated
"""
count = 0
# Invalidate hot cache
if self._hot_cache.invalidate_by_id(memory_type, memory_id, scope):
count += 1
# Invalidate retrieval cache
if self._retrieval_cache:
uuid_id = UUID(str(memory_id)) if not isinstance(memory_id, UUID) else memory_id
count += self._retrieval_cache.invalidate_by_memory(uuid_id)
logger.debug(f"Invalidated {count} cache entries for {memory_type}:{memory_id}")
return count
async def invalidate_by_type(self, memory_type: str) -> int:
"""
Invalidate all entries of a memory type.
Args:
memory_type: Type of memory
Returns:
Number of entries invalidated
"""
count = self._hot_cache.invalidate_by_type(memory_type)
if self._retrieval_cache:
count += self._retrieval_cache.clear()
logger.info(f"Invalidated {count} cache entries for type {memory_type}")
return count
async def invalidate_by_scope(self, scope: str) -> int:
"""
Invalidate all entries in a scope.
Args:
scope: Scope to invalidate (e.g., project_id)
Returns:
Number of entries invalidated
"""
count = self._hot_cache.invalidate_by_scope(scope)
# Retrieval cache doesn't support scope-based invalidation
# so we clear it entirely for safety
if self._retrieval_cache:
count += self._retrieval_cache.clear()
logger.info(f"Invalidated {count} cache entries for scope {scope}")
return count
async def invalidate_embedding(
self,
content: str,
model: str = "default",
) -> bool:
"""
Invalidate a cached embedding.
Args:
content: Content text
model: Model name
Returns:
True if entry was found and removed
"""
return await self._embedding_cache.invalidate(content, model)
async def clear_all(self) -> int:
"""
Clear all caches.
Returns:
Total number of entries cleared
"""
count = 0
count += self._hot_cache.clear()
count += await self._embedding_cache.clear()
if self._retrieval_cache:
count += self._retrieval_cache.clear()
logger.info(f"Cleared {count} entries from all caches")
return count
# =========================================================================
# Cleanup
# =========================================================================
async def cleanup_expired(self) -> int:
"""
Clean up expired entries from all caches.
Returns:
Number of entries cleaned up
"""
with self._lock:
count = 0
count += self._hot_cache.cleanup_expired()
count += self._embedding_cache.cleanup_expired()
# Retrieval cache doesn't have a cleanup method,
# but entries expire on access
self._last_cleanup = _utcnow()
self._cleanup_count += 1
if count > 0:
logger.info(f"Cleaned up {count} expired cache entries")
return count
# =========================================================================
# Statistics
# =========================================================================
def get_stats(self) -> CacheStats:
"""
Get aggregated cache statistics.
Returns:
CacheStats with all cache metrics
"""
hot_stats = self._hot_cache.get_stats().to_dict()
emb_stats = self._embedding_cache.get_stats().to_dict()
retrieval_stats: dict[str, Any] = {}
if self._retrieval_cache:
retrieval_stats = self._retrieval_cache.get_stats()
# Calculate overall hit rate
total_hits = hot_stats.get("hits", 0) + emb_stats.get("hits", 0)
total_misses = hot_stats.get("misses", 0) + emb_stats.get("misses", 0)
if retrieval_stats:
# Retrieval cache doesn't track hits/misses the same way
pass
total_requests = total_hits + total_misses
overall_hit_rate = total_hits / total_requests if total_requests > 0 else 0.0
return CacheStats(
hot_cache=hot_stats,
embedding_cache=emb_stats,
retrieval_cache=retrieval_stats,
overall_hit_rate=overall_hit_rate,
last_cleanup=self._last_cleanup,
cleanup_count=self._cleanup_count,
)
def get_hot_memories(self, limit: int = 10) -> list[tuple[CacheKey, int]]:
"""
Get the most frequently accessed memories.
Args:
limit: Maximum number to return
Returns:
List of (key, access_count) tuples
"""
return self._hot_cache.get_hot_memories(limit)
def reset_stats(self) -> None:
"""Reset all cache statistics."""
self._hot_cache.reset_stats()
self._embedding_cache.reset_stats()
# =========================================================================
# Warmup
# =========================================================================
async def warmup(
self,
memories: list[tuple[str, UUID | str, Any]],
scope: str | None = None,
) -> int:
"""
Warm up the hot cache with memories.
Args:
memories: List of (memory_type, memory_id, memory) tuples
scope: Optional scope for all memories
Returns:
Number of memories cached
"""
if not self._enabled:
return 0
for memory_type, memory_id, memory in memories:
self._hot_cache.put_by_id(memory_type, memory_id, memory, scope)
logger.info(f"Warmed up cache with {len(memories)} memories")
return len(memories)
# Singleton instance
_cache_manager: CacheManager | None = None
_cache_manager_lock = threading.Lock()
def get_cache_manager(
redis: "Redis | None" = None,
reset: bool = False,
) -> CacheManager:
"""
Get the global CacheManager instance.
Thread-safe with double-checked locking pattern.
Args:
redis: Optional Redis connection
reset: Force create a new instance
Returns:
CacheManager instance
"""
global _cache_manager
if reset or _cache_manager is None:
with _cache_manager_lock:
if reset or _cache_manager is None:
_cache_manager = CacheManager(redis=redis)
return _cache_manager
def reset_cache_manager() -> None:
"""Reset the global cache manager instance."""
global _cache_manager
with _cache_manager_lock:
_cache_manager = None

View File

@@ -0,0 +1,627 @@
# app/services/memory/cache/embedding_cache.py
"""
Embedding Cache.
Caches embeddings by content hash to avoid recomputing.
Provides significant performance improvement for repeated content.
"""
import hashlib
import logging
import threading
from collections import OrderedDict
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from redis.asyncio import Redis
logger = logging.getLogger(__name__)
def _utcnow() -> datetime:
"""Get current UTC time as timezone-aware datetime."""
return datetime.now(UTC)
@dataclass
class EmbeddingEntry:
"""A cached embedding entry."""
embedding: list[float]
content_hash: str
model: str
created_at: datetime
ttl_seconds: float = 3600.0 # 1 hour default
def is_expired(self) -> bool:
"""Check if this entry has expired."""
age = (_utcnow() - self.created_at).total_seconds()
return age > self.ttl_seconds
@dataclass
class EmbeddingCacheStats:
"""Statistics for the embedding cache."""
hits: int = 0
misses: int = 0
evictions: int = 0
expirations: int = 0
current_size: int = 0
max_size: int = 0
bytes_saved: int = 0 # Estimated bytes saved by caching
@property
def hit_rate(self) -> float:
"""Calculate cache hit rate."""
total = self.hits + self.misses
if total == 0:
return 0.0
return self.hits / total
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary."""
return {
"hits": self.hits,
"misses": self.misses,
"evictions": self.evictions,
"expirations": self.expirations,
"current_size": self.current_size,
"max_size": self.max_size,
"hit_rate": self.hit_rate,
"bytes_saved": self.bytes_saved,
}
class EmbeddingCache:
"""
Cache for embeddings by content hash.
Features:
- Content-hash based deduplication
- LRU eviction
- TTL-based expiration
- Optional Redis backing for persistence
- Thread-safe operations
Performance targets:
- Cache hit rate > 90% for repeated content
- Get/put operations < 1ms (memory), < 5ms (Redis)
"""
def __init__(
self,
max_size: int = 50000,
default_ttl_seconds: float = 3600.0,
redis: "Redis | None" = None,
redis_prefix: str = "mem:emb",
) -> None:
"""
Initialize the embedding cache.
Args:
max_size: Maximum number of entries in memory cache
default_ttl_seconds: Default TTL for entries (1 hour)
redis: Optional Redis connection for persistence
redis_prefix: Prefix for Redis keys
"""
self._max_size = max_size
self._default_ttl = default_ttl_seconds
self._cache: OrderedDict[str, EmbeddingEntry] = OrderedDict()
self._lock = threading.RLock()
self._stats = EmbeddingCacheStats(max_size=max_size)
self._redis = redis
self._redis_prefix = redis_prefix
logger.info(
f"Initialized EmbeddingCache with max_size={max_size}, "
f"ttl={default_ttl_seconds}s, redis={'enabled' if redis else 'disabled'}"
)
def set_redis(self, redis: "Redis") -> None:
"""Set Redis connection for persistence."""
self._redis = redis
@staticmethod
def hash_content(content: str) -> str:
"""
Compute hash of content for cache key.
Args:
content: Content to hash
Returns:
32-character hex hash
"""
return hashlib.sha256(content.encode()).hexdigest()[:32]
def _cache_key(self, content_hash: str, model: str) -> str:
"""Build cache key from content hash and model."""
return f"{content_hash}:{model}"
def _redis_key(self, content_hash: str, model: str) -> str:
"""Build Redis key from content hash and model."""
return f"{self._redis_prefix}:{content_hash}:{model}"
async def get(
self,
content: str,
model: str = "default",
) -> list[float] | None:
"""
Get a cached embedding.
Args:
content: Content text
model: Model name
Returns:
Cached embedding or None if not found/expired
"""
content_hash = self.hash_content(content)
cache_key = self._cache_key(content_hash, model)
# Check memory cache first
with self._lock:
if cache_key in self._cache:
entry = self._cache[cache_key]
if entry.is_expired():
del self._cache[cache_key]
self._stats.expirations += 1
self._stats.current_size = len(self._cache)
else:
# Move to end (most recently used)
self._cache.move_to_end(cache_key)
self._stats.hits += 1
return entry.embedding
# Check Redis if available
if self._redis:
try:
redis_key = self._redis_key(content_hash, model)
data = await self._redis.get(redis_key)
if data:
import json
embedding = json.loads(data)
# Store in memory cache for faster access
self._put_memory(content_hash, model, embedding)
self._stats.hits += 1
return embedding
except Exception as e:
logger.warning(f"Redis get error: {e}")
self._stats.misses += 1
return None
async def get_by_hash(
self,
content_hash: str,
model: str = "default",
) -> list[float] | None:
"""
Get a cached embedding by hash.
Args:
content_hash: Content hash
model: Model name
Returns:
Cached embedding or None if not found/expired
"""
cache_key = self._cache_key(content_hash, model)
with self._lock:
if cache_key in self._cache:
entry = self._cache[cache_key]
if entry.is_expired():
del self._cache[cache_key]
self._stats.expirations += 1
self._stats.current_size = len(self._cache)
else:
self._cache.move_to_end(cache_key)
self._stats.hits += 1
return entry.embedding
# Check Redis
if self._redis:
try:
redis_key = self._redis_key(content_hash, model)
data = await self._redis.get(redis_key)
if data:
import json
embedding = json.loads(data)
self._put_memory(content_hash, model, embedding)
self._stats.hits += 1
return embedding
except Exception as e:
logger.warning(f"Redis get error: {e}")
self._stats.misses += 1
return None
async def put(
self,
content: str,
embedding: list[float],
model: str = "default",
ttl_seconds: float | None = None,
) -> str:
"""
Cache an embedding.
Args:
content: Content text
embedding: Embedding vector
model: Model name
ttl_seconds: Optional TTL override
Returns:
Content hash
"""
content_hash = self.hash_content(content)
ttl = ttl_seconds or self._default_ttl
# Store in memory
self._put_memory(content_hash, model, embedding, ttl)
# Store in Redis if available
if self._redis:
try:
import json
redis_key = self._redis_key(content_hash, model)
await self._redis.setex(
redis_key,
int(ttl),
json.dumps(embedding),
)
except Exception as e:
logger.warning(f"Redis put error: {e}")
return content_hash
def _put_memory(
self,
content_hash: str,
model: str,
embedding: list[float],
ttl_seconds: float | None = None,
) -> None:
"""Store in memory cache."""
with self._lock:
# Evict if at capacity
self._evict_if_needed()
cache_key = self._cache_key(content_hash, model)
entry = EmbeddingEntry(
embedding=embedding,
content_hash=content_hash,
model=model,
created_at=_utcnow(),
ttl_seconds=ttl_seconds or self._default_ttl,
)
self._cache[cache_key] = entry
self._cache.move_to_end(cache_key)
self._stats.current_size = len(self._cache)
def _evict_if_needed(self) -> None:
"""Evict entries if cache is at capacity."""
while len(self._cache) >= self._max_size:
if self._cache:
self._cache.popitem(last=False)
self._stats.evictions += 1
async def put_batch(
self,
items: list[tuple[str, list[float]]],
model: str = "default",
ttl_seconds: float | None = None,
) -> list[str]:
"""
Cache multiple embeddings.
Args:
items: List of (content, embedding) tuples
model: Model name
ttl_seconds: Optional TTL override
Returns:
List of content hashes
"""
hashes = []
for content, embedding in items:
content_hash = await self.put(content, embedding, model, ttl_seconds)
hashes.append(content_hash)
return hashes
async def invalidate(
self,
content: str,
model: str = "default",
) -> bool:
"""
Invalidate a cached embedding.
Args:
content: Content text
model: Model name
Returns:
True if entry was found and removed
"""
content_hash = self.hash_content(content)
return await self.invalidate_by_hash(content_hash, model)
async def invalidate_by_hash(
self,
content_hash: str,
model: str = "default",
) -> bool:
"""
Invalidate a cached embedding by hash.
Args:
content_hash: Content hash
model: Model name
Returns:
True if entry was found and removed
"""
cache_key = self._cache_key(content_hash, model)
removed = False
with self._lock:
if cache_key in self._cache:
del self._cache[cache_key]
self._stats.current_size = len(self._cache)
removed = True
# Remove from Redis
if self._redis:
try:
redis_key = self._redis_key(content_hash, model)
await self._redis.delete(redis_key)
removed = True
except Exception as e:
logger.warning(f"Redis delete error: {e}")
return removed
async def invalidate_by_model(self, model: str) -> int:
"""
Invalidate all embeddings for a model.
Args:
model: Model name
Returns:
Number of entries invalidated
"""
count = 0
with self._lock:
keys_to_remove = [
k for k, v in self._cache.items() if v.model == model
]
for key in keys_to_remove:
del self._cache[key]
count += 1
self._stats.current_size = len(self._cache)
# Note: Redis pattern deletion would require SCAN which is expensive
# For now, we only clear memory cache for model-based invalidation
return count
async def clear(self) -> int:
"""
Clear all cache entries.
Returns:
Number of entries cleared
"""
with self._lock:
count = len(self._cache)
self._cache.clear()
self._stats.current_size = 0
# Clear Redis entries
if self._redis:
try:
pattern = f"{self._redis_prefix}:*"
deleted = 0
async for key in self._redis.scan_iter(match=pattern):
await self._redis.delete(key)
deleted += 1
count = max(count, deleted)
except Exception as e:
logger.warning(f"Redis clear error: {e}")
logger.info(f"Cleared {count} entries from embedding cache")
return count
def cleanup_expired(self) -> int:
"""
Remove all expired entries from memory cache.
Returns:
Number of entries removed
"""
with self._lock:
keys_to_remove = [
k for k, v in self._cache.items() if v.is_expired()
]
for key in keys_to_remove:
del self._cache[key]
self._stats.expirations += 1
self._stats.current_size = len(self._cache)
if keys_to_remove:
logger.debug(f"Cleaned up {len(keys_to_remove)} expired embeddings")
return len(keys_to_remove)
def get_stats(self) -> EmbeddingCacheStats:
"""Get cache statistics."""
with self._lock:
self._stats.current_size = len(self._cache)
return self._stats
def reset_stats(self) -> None:
"""Reset cache statistics."""
with self._lock:
self._stats = EmbeddingCacheStats(
max_size=self._max_size,
current_size=len(self._cache),
)
@property
def size(self) -> int:
"""Get current cache size."""
return len(self._cache)
@property
def max_size(self) -> int:
"""Get maximum cache size."""
return self._max_size
class CachedEmbeddingGenerator:
"""
Wrapper for embedding generators with caching.
Wraps an embedding generator to cache results.
"""
def __init__(
self,
generator: Any,
cache: EmbeddingCache,
model: str = "default",
) -> None:
"""
Initialize the cached embedding generator.
Args:
generator: Underlying embedding generator
cache: Embedding cache
model: Model name for cache keys
"""
self._generator = generator
self._cache = cache
self._model = model
self._call_count = 0
self._cache_hit_count = 0
async def generate(self, text: str) -> list[float]:
"""
Generate embedding with caching.
Args:
text: Text to embed
Returns:
Embedding vector
"""
self._call_count += 1
# Check cache first
cached = await self._cache.get(text, self._model)
if cached is not None:
self._cache_hit_count += 1
return cached
# Generate and cache
embedding = await self._generator.generate(text)
await self._cache.put(text, embedding, self._model)
return embedding
async def generate_batch(
self,
texts: list[str],
) -> list[list[float]]:
"""
Generate embeddings for multiple texts with caching.
Args:
texts: Texts to embed
Returns:
List of embedding vectors
"""
results: list[list[float] | None] = [None] * len(texts)
to_generate: list[tuple[int, str]] = []
# Check cache for each text
for i, text in enumerate(texts):
cached = await self._cache.get(text, self._model)
if cached is not None:
results[i] = cached
self._cache_hit_count += 1
else:
to_generate.append((i, text))
self._call_count += len(texts)
# Generate missing embeddings
if to_generate:
if hasattr(self._generator, "generate_batch"):
texts_to_gen = [t for _, t in to_generate]
embeddings = await self._generator.generate_batch(texts_to_gen)
for (idx, text), embedding in zip(to_generate, embeddings, strict=True):
results[idx] = embedding
await self._cache.put(text, embedding, self._model)
else:
# Fallback to individual generation
for idx, text in to_generate:
embedding = await self._generator.generate(text)
results[idx] = embedding
await self._cache.put(text, embedding, self._model)
return results # type: ignore[return-value]
def get_stats(self) -> dict[str, Any]:
"""Get generator statistics."""
return {
"call_count": self._call_count,
"cache_hit_count": self._cache_hit_count,
"cache_hit_rate": (
self._cache_hit_count / self._call_count
if self._call_count > 0
else 0.0
),
"cache_stats": self._cache.get_stats().to_dict(),
}
# Factory function
def create_embedding_cache(
max_size: int = 50000,
default_ttl_seconds: float = 3600.0,
redis: "Redis | None" = None,
) -> EmbeddingCache:
"""
Create an embedding cache.
Args:
max_size: Maximum number of entries
default_ttl_seconds: Default TTL for entries
redis: Optional Redis connection
Returns:
Configured EmbeddingCache instance
"""
return EmbeddingCache(
max_size=max_size,
default_ttl_seconds=default_ttl_seconds,
redis=redis,
)

View File

@@ -0,0 +1,463 @@
# app/services/memory/cache/hot_cache.py
"""
Hot Memory Cache.
LRU cache for frequently accessed memories.
Provides fast access to recently used memories without database queries.
"""
import logging
import threading
from collections import OrderedDict
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import Any
from uuid import UUID
logger = logging.getLogger(__name__)
def _utcnow() -> datetime:
"""Get current UTC time as timezone-aware datetime."""
return datetime.now(UTC)
@dataclass
class CacheEntry[T]:
"""A cached memory entry with metadata."""
value: T
created_at: datetime
last_accessed_at: datetime
access_count: int = 1
ttl_seconds: float = 300.0
def is_expired(self) -> bool:
"""Check if this entry has expired."""
age = (_utcnow() - self.created_at).total_seconds()
return age > self.ttl_seconds
def touch(self) -> None:
"""Update access time and count."""
self.last_accessed_at = _utcnow()
self.access_count += 1
@dataclass
class CacheKey:
"""A structured cache key with components."""
memory_type: str
memory_id: str
scope: str | None = None
def __hash__(self) -> int:
return hash((self.memory_type, self.memory_id, self.scope))
def __eq__(self, other: object) -> bool:
if not isinstance(other, CacheKey):
return False
return (
self.memory_type == other.memory_type
and self.memory_id == other.memory_id
and self.scope == other.scope
)
def __str__(self) -> str:
if self.scope:
return f"{self.memory_type}:{self.scope}:{self.memory_id}"
return f"{self.memory_type}:{self.memory_id}"
@dataclass
class HotCacheStats:
"""Statistics for the hot memory cache."""
hits: int = 0
misses: int = 0
evictions: int = 0
expirations: int = 0
current_size: int = 0
max_size: int = 0
@property
def hit_rate(self) -> float:
"""Calculate cache hit rate."""
total = self.hits + self.misses
if total == 0:
return 0.0
return self.hits / total
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary."""
return {
"hits": self.hits,
"misses": self.misses,
"evictions": self.evictions,
"expirations": self.expirations,
"current_size": self.current_size,
"max_size": self.max_size,
"hit_rate": self.hit_rate,
}
class HotMemoryCache[T]:
"""
LRU cache for frequently accessed memories.
Features:
- LRU eviction when capacity is reached
- TTL-based expiration
- Access count tracking for hot memory identification
- Thread-safe operations
- Scoped invalidation
Performance targets:
- Cache hit rate > 80% for hot memories
- Get/put operations < 1ms
"""
def __init__(
self,
max_size: int = 10000,
default_ttl_seconds: float = 300.0,
) -> None:
"""
Initialize the hot memory cache.
Args:
max_size: Maximum number of entries
default_ttl_seconds: Default TTL for entries (5 minutes)
"""
self._max_size = max_size
self._default_ttl = default_ttl_seconds
self._cache: OrderedDict[CacheKey, CacheEntry[T]] = OrderedDict()
self._lock = threading.RLock()
self._stats = HotCacheStats(max_size=max_size)
logger.info(
f"Initialized HotMemoryCache with max_size={max_size}, "
f"ttl={default_ttl_seconds}s"
)
def get(self, key: CacheKey) -> T | None:
"""
Get a memory from cache.
Args:
key: Cache key
Returns:
Cached value or None if not found/expired
"""
with self._lock:
if key not in self._cache:
self._stats.misses += 1
return None
entry = self._cache[key]
# Check expiration
if entry.is_expired():
del self._cache[key]
self._stats.expirations += 1
self._stats.misses += 1
self._stats.current_size = len(self._cache)
return None
# Move to end (most recently used)
self._cache.move_to_end(key)
entry.touch()
self._stats.hits += 1
return entry.value
def get_by_id(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> T | None:
"""
Get a memory by type and ID.
Args:
memory_type: Type of memory (episodic, semantic, procedural)
memory_id: Memory ID
scope: Optional scope (project_id, agent_id)
Returns:
Cached value or None if not found/expired
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
return self.get(key)
def put(
self,
key: CacheKey,
value: T,
ttl_seconds: float | None = None,
) -> None:
"""
Put a memory into cache.
Args:
key: Cache key
value: Value to cache
ttl_seconds: Optional TTL override
"""
with self._lock:
# Evict if at capacity
self._evict_if_needed()
now = _utcnow()
entry = CacheEntry(
value=value,
created_at=now,
last_accessed_at=now,
access_count=1,
ttl_seconds=ttl_seconds or self._default_ttl,
)
self._cache[key] = entry
self._cache.move_to_end(key)
self._stats.current_size = len(self._cache)
def put_by_id(
self,
memory_type: str,
memory_id: UUID | str,
value: T,
scope: str | None = None,
ttl_seconds: float | None = None,
) -> None:
"""
Put a memory by type and ID.
Args:
memory_type: Type of memory
memory_id: Memory ID
value: Value to cache
scope: Optional scope
ttl_seconds: Optional TTL override
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
self.put(key, value, ttl_seconds)
def _evict_if_needed(self) -> None:
"""Evict entries if cache is at capacity."""
while len(self._cache) >= self._max_size:
# Remove least recently used (first item)
if self._cache:
self._cache.popitem(last=False)
self._stats.evictions += 1
def invalidate(self, key: CacheKey) -> bool:
"""
Invalidate a specific cache entry.
Args:
key: Cache key to invalidate
Returns:
True if entry was found and removed
"""
with self._lock:
if key in self._cache:
del self._cache[key]
self._stats.current_size = len(self._cache)
return True
return False
def invalidate_by_id(
self,
memory_type: str,
memory_id: UUID | str,
scope: str | None = None,
) -> bool:
"""
Invalidate a memory by type and ID.
Args:
memory_type: Type of memory
memory_id: Memory ID
scope: Optional scope
Returns:
True if entry was found and removed
"""
key = CacheKey(
memory_type=memory_type,
memory_id=str(memory_id),
scope=scope,
)
return self.invalidate(key)
def invalidate_by_type(self, memory_type: str) -> int:
"""
Invalidate all entries of a memory type.
Args:
memory_type: Type of memory to invalidate
Returns:
Number of entries invalidated
"""
with self._lock:
keys_to_remove = [
k for k in self._cache.keys() if k.memory_type == memory_type
]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def invalidate_by_scope(self, scope: str) -> int:
"""
Invalidate all entries in a scope.
Args:
scope: Scope to invalidate (e.g., project_id)
Returns:
Number of entries invalidated
"""
with self._lock:
keys_to_remove = [k for k in self._cache.keys() if k.scope == scope]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def invalidate_pattern(self, pattern: str) -> int:
"""
Invalidate entries matching a pattern.
Pattern can include * as wildcard.
Args:
pattern: Pattern to match (e.g., "episodic:*")
Returns:
Number of entries invalidated
"""
import fnmatch
with self._lock:
keys_to_remove = [
k for k in self._cache.keys() if fnmatch.fnmatch(str(k), pattern)
]
for key in keys_to_remove:
del self._cache[key]
self._stats.current_size = len(self._cache)
return len(keys_to_remove)
def clear(self) -> int:
"""
Clear all cache entries.
Returns:
Number of entries cleared
"""
with self._lock:
count = len(self._cache)
self._cache.clear()
self._stats.current_size = 0
logger.info(f"Cleared {count} entries from hot cache")
return count
def cleanup_expired(self) -> int:
"""
Remove all expired entries.
Returns:
Number of entries removed
"""
with self._lock:
keys_to_remove = [
k for k, v in self._cache.items() if v.is_expired()
]
for key in keys_to_remove:
del self._cache[key]
self._stats.expirations += 1
self._stats.current_size = len(self._cache)
if keys_to_remove:
logger.debug(f"Cleaned up {len(keys_to_remove)} expired entries")
return len(keys_to_remove)
def get_hot_memories(self, limit: int = 10) -> list[tuple[CacheKey, int]]:
"""
Get the most frequently accessed memories.
Args:
limit: Maximum number of memories to return
Returns:
List of (key, access_count) tuples sorted by access count
"""
with self._lock:
entries = [
(k, v.access_count)
for k, v in self._cache.items()
if not v.is_expired()
]
entries.sort(key=lambda x: x[1], reverse=True)
return entries[:limit]
def get_stats(self) -> HotCacheStats:
"""Get cache statistics."""
with self._lock:
self._stats.current_size = len(self._cache)
return self._stats
def reset_stats(self) -> None:
"""Reset cache statistics."""
with self._lock:
self._stats = HotCacheStats(
max_size=self._max_size,
current_size=len(self._cache),
)
@property
def size(self) -> int:
"""Get current cache size."""
return len(self._cache)
@property
def max_size(self) -> int:
"""Get maximum cache size."""
return self._max_size
# Factory function for typed caches
def create_hot_cache(
max_size: int = 10000,
default_ttl_seconds: float = 300.0,
) -> HotMemoryCache[Any]:
"""
Create a hot memory cache.
Args:
max_size: Maximum number of entries
default_ttl_seconds: Default TTL for entries
Returns:
Configured HotMemoryCache instance
"""
return HotMemoryCache(
max_size=max_size,
default_ttl_seconds=default_ttl_seconds,
)