feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/scoring/relevance.py
+++ b/backend/app/services/context/scoring/relevance.py
@@ -9,6 +9,7 @@ import logging
 import re
 from typing import TYPE_CHECKING, Any

+from ..config import ContextSettings, get_context_settings
 from ..types import BaseContext, KnowledgeContext
 from .base import BaseScorer

@@ -32,7 +33,9 @@ class RelevanceScorer(BaseScorer):
        self,
        mcp_manager: "MCPClientManager | None" = None,
        weight: float = 1.0,
-        keyword_fallback_weight: float = 0.5,
+        keyword_fallback_weight: float | None = None,
+        semantic_max_chars: int | None = None,
+        settings: ContextSettings | None = None,
    ) -> None:
        """
        Initialize relevance scorer.
@@ -40,11 +43,25 @@ class RelevanceScorer(BaseScorer):
        Args:
            mcp_manager: MCP manager for Knowledge Base calls
            weight: Scorer weight for composite scoring
-            keyword_fallback_weight: Max score for keyword-based fallback
+            keyword_fallback_weight: Max score for keyword-based fallback (overrides settings)
+            semantic_max_chars: Max content length for semantic similarity (overrides settings)
+            settings: Context settings (uses global if None)
        """
        super().__init__(weight)
+        self._settings = settings or get_context_settings()
        self._mcp = mcp_manager
-        self._keyword_fallback_weight = keyword_fallback_weight
+
+        # Use provided values or fall back to settings
+        self._keyword_fallback_weight = (
+            keyword_fallback_weight
+            if keyword_fallback_weight is not None
+            else self._settings.relevance_keyword_fallback_weight
+        )
+        self._semantic_max_chars = (
+            semantic_max_chars
+            if semantic_max_chars is not None
+            else self._settings.relevance_semantic_max_chars
+        )

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
@@ -112,11 +129,11 @@ class RelevanceScorer(BaseScorer):
                tool="compute_similarity",
                args={
                    "text1": query,
-                    "text2": context.content[:2000],  # Limit content length
+                    "text2": context.content[: self._semantic_max_chars],  # Limit content length
                },
            )

-            if result.success and result.data:
+            if result.success and isinstance(result.data, dict):
                similarity = result.data.get("similarity")
                if similarity is not None:
                    return self.normalize_score(float(similarity))
@@ -171,7 +188,7 @@ class RelevanceScorer(BaseScorer):
        **kwargs: Any,
    ) -> list[float]:
        """
-        Score multiple contexts.
+        Score multiple contexts in parallel.

        Args:
            contexts: Contexts to score
@@ -181,8 +198,10 @@ class RelevanceScorer(BaseScorer):
        Returns:
            List of scores (same order as input)
        """
-        scores = []
-        for context in contexts:
-            score = await self.score(context, query, **kwargs)
-            scores.append(score)
-        return scores
+        import asyncio
+
+        if not contexts:
+            return []
+
+        tasks = [self.score(context, query, **kwargs) for context in contexts]
+        return await asyncio.gather(*tasks)