feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions
--- a/backend/app/services/context/scoring/composite.py
+++ b/backend/app/services/context/scoring/composite.py
@@ -8,6 +8,7 @@ import asyncio
 import logging
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
+from weakref import WeakValueDictionary

 from ..config import ContextSettings, get_context_settings
 from ..types import BaseContext
@@ -89,6 +90,11 @@ class CompositeScorer:
        self._recency_scorer = RecencyScorer(weight=self._recency_weight)
        self._priority_scorer = PriorityScorer(weight=self._priority_weight)

+        # Per-context locks to prevent race conditions during parallel scoring
+        # Uses WeakValueDictionary so locks are garbage collected when not in use
+        self._context_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
+        self._locks_lock = asyncio.Lock()  # Lock to protect _context_locks access
+
    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
        self._relevance_scorer.set_mcp_manager(mcp_manager)
@@ -128,6 +134,38 @@ class CompositeScorer:
            self._priority_weight = max(0.0, min(1.0, priority))
            self._priority_scorer.weight = self._priority_weight

+    async def _get_context_lock(self, context_id: str) -> asyncio.Lock:
+        """
+        Get or create a lock for a specific context.
+
+        Thread-safe access to per-context locks prevents race conditions
+        when the same context is scored concurrently.
+
+        Args:
+            context_id: The context ID to get a lock for
+
+        Returns:
+            asyncio.Lock for the context
+        """
+        # Fast path: check if lock exists without acquiring main lock
+        if context_id in self._context_locks:
+            lock = self._context_locks.get(context_id)
+            if lock is not None:
+                return lock
+
+        # Slow path: create lock while holding main lock
+        async with self._locks_lock:
+            # Double-check after acquiring lock
+            if context_id in self._context_locks:
+                lock = self._context_locks.get(context_id)
+                if lock is not None:
+                    return lock
+
+            # Create new lock
+            new_lock = asyncio.Lock()
+            self._context_locks[context_id] = new_lock
+            return new_lock
+
    async def score(
        self,
        context: BaseContext,
@@ -157,6 +195,9 @@ class CompositeScorer:
        """
        Compute composite score with individual scores.

+        Uses per-context locking to prevent race conditions when the same
+        context is scored concurrently in parallel scoring operations.
+
        Args:
            context: Context to score
            query: Query to score against
@@ -165,46 +206,50 @@ class CompositeScorer:
        Returns:
            ScoredContext with all scores
        """
-        # Check if context already has a score
-        if context._score is not None:
-            return ScoredContext(
-                context=context,
-                composite_score=context._score,
+        # Get lock for this specific context to prevent race conditions
+        context_lock = await self._get_context_lock(context.id)
+
+        async with context_lock:
+            # Check if context already has a score (inside lock to prevent races)
+            if context._score is not None:
+                return ScoredContext(
+                    context=context,
+                    composite_score=context._score,
+                )
+
+            # Compute individual scores in parallel
+            relevance_task = self._relevance_scorer.score(context, query, **kwargs)
+            recency_task = self._recency_scorer.score(context, query, **kwargs)
+            priority_task = self._priority_scorer.score(context, query, **kwargs)
+
+            relevance_score, recency_score, priority_score = await asyncio.gather(
+                relevance_task, recency_task, priority_task
            )

-        # Compute individual scores in parallel
-        relevance_task = self._relevance_scorer.score(context, query, **kwargs)
-        recency_task = self._recency_scorer.score(context, query, **kwargs)
-        priority_task = self._priority_scorer.score(context, query, **kwargs)
+            # Compute weighted composite
+            total_weight = (
+                self._relevance_weight + self._recency_weight + self._priority_weight
+            )

-        relevance_score, recency_score, priority_score = await asyncio.gather(
-            relevance_task, recency_task, priority_task
-        )
+            if total_weight > 0:
+                composite = (
+                    relevance_score * self._relevance_weight
+                    + recency_score * self._recency_weight
+                    + priority_score * self._priority_weight
+                ) / total_weight
+            else:
+                composite = 0.0

-        # Compute weighted composite
-        total_weight = (
-            self._relevance_weight + self._recency_weight + self._priority_weight
-        )
+            # Cache the score on the context (now safe - inside lock)
+            context._score = composite

-        if total_weight > 0:
-            composite = (
-                relevance_score * self._relevance_weight
-                + recency_score * self._recency_weight
-                + priority_score * self._priority_weight
-            ) / total_weight
-        else:
-            composite = 0.0
-
-        # Cache the score on the context
-        context._score = composite
-
-        return ScoredContext(
-            context=context,
-            composite_score=composite,
-            relevance_score=relevance_score,
-            recency_score=recency_score,
-            priority_score=priority_score,
-        )
+            return ScoredContext(
+                context=context,
+                composite_score=composite,
+                relevance_score=relevance_score,
+                recency_score=recency_score,
+                priority_score=priority_score,
+            )

    async def score_batch(
        self,