feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings).
- Optimize parallel execution in token counting, scoring, and reranking for source diversity.
- Improve caching logic:
  - Add per-context locks for safe parallel scoring.
  - Reuse precomputed fingerprints for cache efficiency.
- Make truncation, scoring, and ranker behaviors fully configurable via settings.
- Add support for middle truncation, context hash-based hashing, and dynamic token limiting.
- Refactor methods for scalability and better error handling.

Tests: Updated all affected components with additional test cases.
This commit is contained in:
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions

View File

@@ -214,6 +214,7 @@ class ContextEngine:
contexts.extend(custom_contexts)
# Check cache if enabled
fingerprint: str | None = None
if use_cache and self._cache.is_enabled:
fingerprint = self._cache.compute_fingerprint(contexts, query, model)
cached = await self._cache.get_assembled(fingerprint)
@@ -232,9 +233,8 @@ class ContextEngine:
format_output=format_output,
)
# Cache result if enabled
if use_cache and self._cache.is_enabled:
fingerprint = self._cache.compute_fingerprint(contexts, query, model)
# Cache result if enabled (reuse fingerprint computed above)
if use_cache and self._cache.is_enabled and fingerprint is not None:
await self._cache.set_assembled(fingerprint, result)
return result
@@ -275,7 +275,8 @@ class ContextEngine:
)
contexts = []
for chunk in result.data.get("results", []):
results = result.data.get("results", []) if isinstance(result.data, dict) else []
for chunk in results:
contexts.append(
KnowledgeContext(
content=chunk.get("content", ""),