feat(context): enhance timeout handling, tenant isolation, and budget management

- Added timeout enforcement for token counting, scoring, and compression with detailed error handling. - Introduced tenant isolation in context caching using project and agent identifiers. - Enhanced budget management with stricter checks for critical context overspending and buffer limitations. - Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments. - Updated default assembly timeout settings for improved performance and reliability. - Improved XML escaping in Claude adapter for safety against injection attacks. - Standardized token estimation using model-specific ratios.
2026-01-04 15:52:50 +01:00
parent 2bea057fb1
commit 1628eacf2b
10 changed files with 271 additions and 175 deletions
--- a/backend/app/services/context/prioritization/ranker.py
+++ b/backend/app/services/context/prioritization/ranker.py
@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any

 from ..budget import TokenBudget, TokenCalculator
 from ..config import ContextSettings, get_context_settings
+from ..exceptions import BudgetExceededError
 from ..scoring.composite import CompositeScorer, ScoredContext
 from ..types import BaseContext, ContextPriority

@@ -127,6 +128,9 @@ class ContextRanker:
        excluded: list[ScoredContext] = []
        total_tokens = 0

+        # Calculate the usable budget (total minus reserved portions)
+        usable_budget = budget.total - budget.response_reserve - budget.buffer
+
        # First, try to fit required contexts
        for sc in required:
            token_count = sc.context.token_count or 0
@@ -137,7 +141,20 @@ class ContextRanker:
                selected.append(sc)
                total_tokens += token_count
            else:
-                # Force-fit CRITICAL contexts if needed
+                # Force-fit CRITICAL contexts if needed, but check total budget first
+                if total_tokens + token_count > usable_budget:
+                    # Even CRITICAL contexts cannot exceed total model context window
+                    raise BudgetExceededError(
+                        message=(
+                            f"CRITICAL contexts exceed total budget. "
+                            f"Context '{sc.context.source}' ({token_count} tokens) "
+                            f"would exceed usable budget of {usable_budget} tokens."
+                        ),
+                        allocated=usable_budget,
+                        requested=total_tokens + token_count,
+                        context_type="CRITICAL_OVERFLOW",
+                    )
+
                budget.allocate(context_type, token_count, force=True)
                selected.append(sc)
                total_tokens += token_count