feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/engine.py
+++ b/backend/app/services/context/engine.py
@@ -214,6 +214,7 @@ class ContextEngine:
            contexts.extend(custom_contexts)

        # Check cache if enabled
+        fingerprint: str | None = None
        if use_cache and self._cache.is_enabled:
            fingerprint = self._cache.compute_fingerprint(contexts, query, model)
            cached = await self._cache.get_assembled(fingerprint)
@@ -232,9 +233,8 @@ class ContextEngine:
            format_output=format_output,
        )

-        # Cache result if enabled
-        if use_cache and self._cache.is_enabled:
-            fingerprint = self._cache.compute_fingerprint(contexts, query, model)
+        # Cache result if enabled (reuse fingerprint computed above)
+        if use_cache and self._cache.is_enabled and fingerprint is not None:
            await self._cache.set_assembled(fingerprint, result)

        return result
@@ -275,7 +275,8 @@ class ContextEngine:
            )

            contexts = []
-            for chunk in result.data.get("results", []):
+            results = result.data.get("results", []) if isinstance(result.data, dict) else []
+            for chunk in results:
                contexts.append(
                    KnowledgeContext(
                        content=chunk.get("content", ""),