chore(context): refactor for consistency, optimize formatting, and simplify logic

- Cleaned up unnecessary comments in `__all__` definitions for better readability. - Adjusted indentation and formatting across modules for improved clarity (e.g., long lines, logical grouping). - Simplified conditional expressions and inline comments for context scoring and ranking. - Replaced some hard-coded values with type-safe annotations (e.g., `ClassVar`). - Removed unused imports and ensured consistent usage across test files. - Updated `test_score_not_cached_on_context` to clarify caching behavior. - Improved truncation strategy logic and marker handling.
2026-01-04 15:23:14 +01:00
parent 9e54f16e56
commit 2bea057fb1
26 changed files with 226 additions and 273 deletions
--- a/backend/app/services/context/scoring/composite.py
+++ b/backend/app/services/context/scoring/composite.py
@@ -92,7 +92,9 @@ class CompositeScorer:

        # Per-context locks to prevent race conditions during parallel scoring
        # Uses WeakValueDictionary so locks are garbage collected when not in use
-        self._context_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
+        self._context_locks: WeakValueDictionary[str, asyncio.Lock] = (
+            WeakValueDictionary()
+        )
        self._locks_lock = asyncio.Lock()  # Lock to protect _context_locks access

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
@@ -207,17 +209,14 @@ class CompositeScorer:
            ScoredContext with all scores
        """
        # Get lock for this specific context to prevent race conditions
+        # within concurrent scoring operations for the same query
        context_lock = await self._get_context_lock(context.id)

        async with context_lock:
-            # Check if context already has a score (inside lock to prevent races)
-            if context._score is not None:
-                return ScoredContext(
-                    context=context,
-                    composite_score=context._score,
-                )
-
            # Compute individual scores in parallel
+            # Note: We do NOT cache scores on the context because scores are
+            # query-dependent. Caching without considering the query would
+            # return incorrect scores for different queries.
            relevance_task = self._relevance_scorer.score(context, query, **kwargs)
            recency_task = self._recency_scorer.score(context, query, **kwargs)
            priority_task = self._priority_scorer.score(context, query, **kwargs)
@@ -240,9 +239,6 @@ class CompositeScorer:
            else:
                composite = 0.0

-            # Cache the score on the context (now safe - inside lock)
-            context._score = composite
-
            return ScoredContext(
                context=context,
                composite_score=composite,
@@ -271,9 +267,7 @@ class CompositeScorer:
            List of ScoredContext (same order as input)
        """
        if parallel:
-            tasks = [
-                self.score_with_details(ctx, query, **kwargs) for ctx in contexts
-            ]
+            tasks = [self.score_with_details(ctx, query, **kwargs) for ctx in contexts]
            return await asyncio.gather(*tasks)
        else:
            results = []
--- a/backend/app/services/context/scoring/priority.py
+++ b/backend/app/services/context/scoring/priority.py
@@ -4,7 +4,7 @@ Priority Scorer for Context Management.
 Scores context based on assigned priority levels.
 """

-from typing import Any
+from typing import Any, ClassVar

 from ..types import BaseContext, ContextType
 from .base import BaseScorer
@@ -19,11 +19,11 @@ class PriorityScorer(BaseScorer):
    """

    # Default priority bonuses by context type
-    DEFAULT_TYPE_BONUSES: dict[ContextType, float] = {
-        ContextType.SYSTEM: 0.2,      # System prompts get a boost
-        ContextType.TASK: 0.15,       # Current task is important
-        ContextType.TOOL: 0.1,        # Recent tool results matter
-        ContextType.KNOWLEDGE: 0.0,   # Knowledge scored by relevance
+    DEFAULT_TYPE_BONUSES: ClassVar[dict[ContextType, float]] = {
+        ContextType.SYSTEM: 0.2,  # System prompts get a boost
+        ContextType.TASK: 0.15,  # Current task is important
+        ContextType.TOOL: 0.1,  # Recent tool results matter
+        ContextType.KNOWLEDGE: 0.0,  # Knowledge scored by relevance
        ContextType.CONVERSATION: 0.0,  # Conversation scored by recency
    }

--- a/backend/app/services/context/scoring/relevance.py
+++ b/backend/app/services/context/scoring/relevance.py
@@ -85,7 +85,10 @@ class RelevanceScorer(BaseScorer):
            Relevance score between 0.0 and 1.0
        """
        # 1. Check for pre-computed relevance score
-        if isinstance(context, KnowledgeContext) and context.relevance_score is not None:
+        if (
+            isinstance(context, KnowledgeContext)
+            and context.relevance_score is not None
+        ):
            return self.normalize_score(context.relevance_score)

        # 2. Check metadata for score
@@ -95,14 +98,19 @@ class RelevanceScorer(BaseScorer):
        if "score" in context.metadata:
            return self.normalize_score(context.metadata["score"])

-        # 3. Try MCP-based semantic similarity
+        # 3. Try MCP-based semantic similarity (if compute_similarity tool is available)
+        # Note: This requires the knowledge-base MCP server to implement compute_similarity
        if self._mcp is not None:
            try:
                score = await self._compute_semantic_similarity(context, query)
                if score is not None:
                    return score
            except Exception as e:
-                logger.debug(f"Semantic scoring failed, using fallback: {e}")
+                # Log at debug level since this is expected if compute_similarity
+                # tool is not implemented in the Knowledge Base server
+                logger.debug(
+                    f"Semantic scoring unavailable, using keyword fallback: {e}"
+                )

        # 4. Fall back to keyword matching
        return self._compute_keyword_score(context, query)
@@ -122,6 +130,9 @@ class RelevanceScorer(BaseScorer):
        Returns:
            Similarity score or None if unavailable
        """
+        if self._mcp is None:
+            return None
+
        try:
            # Use Knowledge Base's search capability to compute similarity
            result = await self._mcp.call_tool(
@@ -129,7 +140,9 @@ class RelevanceScorer(BaseScorer):
                tool="compute_similarity",
                args={
                    "text1": query,
-                    "text2": context.content[: self._semantic_max_chars],  # Limit content length
+                    "text2": context.content[
+                        : self._semantic_max_chars
+                    ],  # Limit content length
                },
            )