feat(context): enhance timeout handling, tenant isolation, and budget management

- Added timeout enforcement for token counting, scoring, and compression with detailed error handling.
- Introduced tenant isolation in context caching using project and agent identifiers.
- Enhanced budget management with stricter checks for critical context overspending and buffer limitations.
- Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments.
- Updated default assembly timeout settings for improved performance and reliability.
- Improved XML escaping in Claude adapter for safety against injection attacks.
- Standardized token estimation using model-specific ratios.
This commit is contained in:
2026-01-04 15:52:50 +01:00
parent 2bea057fb1
commit 1628eacf2b
10 changed files with 271 additions and 175 deletions

View File

@@ -6,9 +6,9 @@ Combines multiple scoring strategies with configurable weights.
import asyncio
import logging
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from weakref import WeakValueDictionary
from ..config import ContextSettings, get_context_settings
from ..types import BaseContext
@@ -91,11 +91,11 @@ class CompositeScorer:
self._priority_scorer = PriorityScorer(weight=self._priority_weight)
# Per-context locks to prevent race conditions during parallel scoring
# Uses WeakValueDictionary so locks are garbage collected when not in use
self._context_locks: WeakValueDictionary[str, asyncio.Lock] = (
WeakValueDictionary()
)
# Uses dict with (lock, last_used_time) tuples for cleanup
self._context_locks: dict[str, tuple[asyncio.Lock, float]] = {}
self._locks_lock = asyncio.Lock() # Lock to protect _context_locks access
self._max_locks = 1000 # Maximum locks to keep (prevent memory growth)
self._lock_ttl = 60.0 # Seconds before a lock can be cleaned up
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
"""Set MCP manager for semantic scoring."""
@@ -141,7 +141,8 @@ class CompositeScorer:
Get or create a lock for a specific context.
Thread-safe access to per-context locks prevents race conditions
when the same context is scored concurrently.
when the same context is scored concurrently. Includes automatic
cleanup of old locks to prevent memory growth.
Args:
context_id: The context ID to get a lock for
@@ -149,25 +150,78 @@ class CompositeScorer:
Returns:
asyncio.Lock for the context
"""
now = time.time()
# Fast path: check if lock exists without acquiring main lock
if context_id in self._context_locks:
lock = self._context_locks.get(context_id)
if lock is not None:
# NOTE: We only READ here - no writes to avoid race conditions
# with cleanup. The timestamp will be updated in the slow path
# if the lock is still valid.
lock_entry = self._context_locks.get(context_id)
if lock_entry is not None:
lock, _ = lock_entry
# Return the lock but defer timestamp update to avoid race
# The lock is still valid; timestamp update is best-effort
return lock
# Slow path: create lock or update timestamp while holding main lock
async with self._locks_lock:
# Double-check after acquiring lock - entry may have been
# created by another coroutine or deleted by cleanup
lock_entry = self._context_locks.get(context_id)
if lock_entry is not None:
lock, _ = lock_entry
# Safe to update timestamp here since we hold the lock
self._context_locks[context_id] = (lock, now)
return lock
# Slow path: create lock while holding main lock
async with self._locks_lock:
# Double-check after acquiring lock
if context_id in self._context_locks:
lock = self._context_locks.get(context_id)
if lock is not None:
return lock
# Cleanup old locks if we have too many
if len(self._context_locks) >= self._max_locks:
self._cleanup_old_locks(now)
# Create new lock
new_lock = asyncio.Lock()
self._context_locks[context_id] = new_lock
self._context_locks[context_id] = (new_lock, now)
return new_lock
def _cleanup_old_locks(self, now: float) -> None:
"""
Remove old locks that haven't been used recently.
Called while holding _locks_lock. Removes locks older than _lock_ttl,
but only if they're not currently held.
Args:
now: Current timestamp for age calculation
"""
cutoff = now - self._lock_ttl
to_remove = []
for context_id, (lock, last_used) in self._context_locks.items():
# Only remove if old AND not currently held
if last_used < cutoff and not lock.locked():
to_remove.append(context_id)
# Remove oldest 50% if still over limit after TTL filtering
if len(self._context_locks) - len(to_remove) >= self._max_locks:
# Sort by last used time and mark oldest for removal
sorted_entries = sorted(
self._context_locks.items(),
key=lambda x: x[1][1], # Sort by last_used time
)
# Remove oldest 50% that aren't locked
target_remove = len(self._context_locks) // 2
for context_id, (lock, _) in sorted_entries:
if len(to_remove) >= target_remove:
break
if context_id not in to_remove and not lock.locked():
to_remove.append(context_id)
for context_id in to_remove:
del self._context_locks[context_id]
if to_remove:
logger.debug(f"Cleaned up {len(to_remove)} context locks")
async def score(
self,
context: BaseContext,