feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings).
- Optimize parallel execution in token counting, scoring, and reranking for source diversity.
- Improve caching logic:
  - Add per-context locks for safe parallel scoring.
  - Reuse precomputed fingerprints for cache efficiency.
- Make truncation, scoring, and ranker behaviors fully configurable via settings.
- Add support for middle truncation, context hash-based hashing, and dynamic token limiting.
- Refactor methods for scalability and better error handling.

Tests: Updated all affected components with additional test cases.
This commit is contained in:
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions

View File

@@ -9,6 +9,7 @@ import logging
import re
from typing import TYPE_CHECKING, Any
from ..config import ContextSettings, get_context_settings
from ..types import BaseContext, KnowledgeContext
from .base import BaseScorer
@@ -32,7 +33,9 @@ class RelevanceScorer(BaseScorer):
self,
mcp_manager: "MCPClientManager | None" = None,
weight: float = 1.0,
keyword_fallback_weight: float = 0.5,
keyword_fallback_weight: float | None = None,
semantic_max_chars: int | None = None,
settings: ContextSettings | None = None,
) -> None:
"""
Initialize relevance scorer.
@@ -40,11 +43,25 @@ class RelevanceScorer(BaseScorer):
Args:
mcp_manager: MCP manager for Knowledge Base calls
weight: Scorer weight for composite scoring
keyword_fallback_weight: Max score for keyword-based fallback
keyword_fallback_weight: Max score for keyword-based fallback (overrides settings)
semantic_max_chars: Max content length for semantic similarity (overrides settings)
settings: Context settings (uses global if None)
"""
super().__init__(weight)
self._settings = settings or get_context_settings()
self._mcp = mcp_manager
self._keyword_fallback_weight = keyword_fallback_weight
# Use provided values or fall back to settings
self._keyword_fallback_weight = (
keyword_fallback_weight
if keyword_fallback_weight is not None
else self._settings.relevance_keyword_fallback_weight
)
self._semantic_max_chars = (
semantic_max_chars
if semantic_max_chars is not None
else self._settings.relevance_semantic_max_chars
)
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
"""Set MCP manager for semantic scoring."""
@@ -112,11 +129,11 @@ class RelevanceScorer(BaseScorer):
tool="compute_similarity",
args={
"text1": query,
"text2": context.content[:2000], # Limit content length
"text2": context.content[: self._semantic_max_chars], # Limit content length
},
)
if result.success and result.data:
if result.success and isinstance(result.data, dict):
similarity = result.data.get("similarity")
if similarity is not None:
return self.normalize_score(float(similarity))
@@ -171,7 +188,7 @@ class RelevanceScorer(BaseScorer):
**kwargs: Any,
) -> list[float]:
"""
Score multiple contexts.
Score multiple contexts in parallel.
Args:
contexts: Contexts to score
@@ -181,8 +198,10 @@ class RelevanceScorer(BaseScorer):
Returns:
List of scores (same order as input)
"""
scores = []
for context in contexts:
score = await self.score(context, query, **kwargs)
scores.append(score)
return scores
import asyncio
if not contexts:
return []
tasks = [self.score(context, query, **kwargs) for context in contexts]
return await asyncio.gather(*tasks)