feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/cache/context_cache.py
+++ b/backend/app/services/context/cache/context_cache.py
@@ -54,7 +54,7 @@ class ContextCache:

        # In-memory fallback cache when Redis unavailable
        self._memory_cache: dict[str, tuple[str, float]] = {}
-        self._max_memory_items = 1000
+        self._max_memory_items = self._settings.cache_memory_max_items

    def set_redis(self, redis: "Redis") -> None:
        """Set Redis connection."""
@@ -100,7 +100,7 @@ class ContextCache:
        Compute a fingerprint for a context assembly request.

        The fingerprint is based on:
-        - Context content and metadata
+        - Context content hash and metadata (not full content for performance)
        - Query string
        - Target model

@@ -112,12 +112,13 @@ class ContextCache:
        Returns:
            32-character hex fingerprint
        """
-        # Build a deterministic representation
+        # Build a deterministic representation using content hashes for performance
+        # This avoids JSON serializing potentially large content strings
        context_data = []
        for ctx in contexts:
            context_data.append({
                "type": ctx.get_type().value,
-                "content": ctx.content,
+                "content_hash": self._hash_content(ctx.content),  # Hash instead of full content
                "source": ctx.source,
                "priority": ctx.priority,  # Already an int
            })