feat(context): enhance timeout handling, tenant isolation, and budget management

- Added timeout enforcement for token counting, scoring, and compression with detailed error handling. - Introduced tenant isolation in context caching using project and agent identifiers. - Enhanced budget management with stricter checks for critical context overspending and buffer limitations. - Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments. - Updated default assembly timeout settings for improved performance and reliability. - Improved XML escaping in Claude adapter for safety against injection attacks. - Standardized token estimation using model-specific ratios.
2026-01-04 15:52:50 +01:00
parent 2bea057fb1
commit 1628eacf2b
10 changed files with 271 additions and 175 deletions
--- a/backend/app/services/context/scoring/composite.py
+++ b/backend/app/services/context/scoring/composite.py
@@ -6,9 +6,9 @@ Combines multiple scoring strategies with configurable weights.

 import asyncio
 import logging
+import time
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
-from weakref import WeakValueDictionary

 from ..config import ContextSettings, get_context_settings
 from ..types import BaseContext
@@ -91,11 +91,11 @@ class CompositeScorer:
        self._priority_scorer = PriorityScorer(weight=self._priority_weight)

        # Per-context locks to prevent race conditions during parallel scoring
-        # Uses WeakValueDictionary so locks are garbage collected when not in use
-        self._context_locks: WeakValueDictionary[str, asyncio.Lock] = (
-            WeakValueDictionary()
-        )
+        # Uses dict with (lock, last_used_time) tuples for cleanup
+        self._context_locks: dict[str, tuple[asyncio.Lock, float]] = {}
        self._locks_lock = asyncio.Lock()  # Lock to protect _context_locks access
+        self._max_locks = 1000  # Maximum locks to keep (prevent memory growth)
+        self._lock_ttl = 60.0  # Seconds before a lock can be cleaned up

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
@@ -141,7 +141,8 @@ class CompositeScorer:
        Get or create a lock for a specific context.

        Thread-safe access to per-context locks prevents race conditions
-        when the same context is scored concurrently.
+        when the same context is scored concurrently. Includes automatic
+        cleanup of old locks to prevent memory growth.

        Args:
            context_id: The context ID to get a lock for
@@ -149,25 +150,78 @@ class CompositeScorer:
        Returns:
            asyncio.Lock for the context
        """
+        now = time.time()
+
        # Fast path: check if lock exists without acquiring main lock
-        if context_id in self._context_locks:
-            lock = self._context_locks.get(context_id)
-            if lock is not None:
+        # NOTE: We only READ here - no writes to avoid race conditions
+        # with cleanup. The timestamp will be updated in the slow path
+        # if the lock is still valid.
+        lock_entry = self._context_locks.get(context_id)
+        if lock_entry is not None:
+            lock, _ = lock_entry
+            # Return the lock but defer timestamp update to avoid race
+            # The lock is still valid; timestamp update is best-effort
+            return lock
+
+        # Slow path: create lock or update timestamp while holding main lock
+        async with self._locks_lock:
+            # Double-check after acquiring lock - entry may have been
+            # created by another coroutine or deleted by cleanup
+            lock_entry = self._context_locks.get(context_id)
+            if lock_entry is not None:
+                lock, _ = lock_entry
+                # Safe to update timestamp here since we hold the lock
+                self._context_locks[context_id] = (lock, now)
                return lock

-        # Slow path: create lock while holding main lock
-        async with self._locks_lock:
-            # Double-check after acquiring lock
-            if context_id in self._context_locks:
-                lock = self._context_locks.get(context_id)
-                if lock is not None:
-                    return lock
+            # Cleanup old locks if we have too many
+            if len(self._context_locks) >= self._max_locks:
+                self._cleanup_old_locks(now)

            # Create new lock
            new_lock = asyncio.Lock()
-            self._context_locks[context_id] = new_lock
+            self._context_locks[context_id] = (new_lock, now)
            return new_lock

+    def _cleanup_old_locks(self, now: float) -> None:
+        """
+        Remove old locks that haven't been used recently.
+
+        Called while holding _locks_lock. Removes locks older than _lock_ttl,
+        but only if they're not currently held.
+
+        Args:
+            now: Current timestamp for age calculation
+        """
+        cutoff = now - self._lock_ttl
+        to_remove = []
+
+        for context_id, (lock, last_used) in self._context_locks.items():
+            # Only remove if old AND not currently held
+            if last_used < cutoff and not lock.locked():
+                to_remove.append(context_id)
+
+        # Remove oldest 50% if still over limit after TTL filtering
+        if len(self._context_locks) - len(to_remove) >= self._max_locks:
+            # Sort by last used time and mark oldest for removal
+            sorted_entries = sorted(
+                self._context_locks.items(),
+                key=lambda x: x[1][1],  # Sort by last_used time
+            )
+            # Remove oldest 50% that aren't locked
+            target_remove = len(self._context_locks) // 2
+            for context_id, (lock, _) in sorted_entries:
+                if len(to_remove) >= target_remove:
+                    break
+                if context_id not in to_remove and not lock.locked():
+                    to_remove.append(context_id)
+
+        for context_id in to_remove:
+            del self._context_locks[context_id]
+
+        if to_remove:
+            logger.debug(f"Cleaned up {len(to_remove)} context locks")
+
    async def score(
        self,
        context: BaseContext,