feat(context): enhance timeout handling, tenant isolation, and budget management

- Added timeout enforcement for token counting, scoring, and compression with detailed error handling.
- Introduced tenant isolation in context caching using project and agent identifiers.
- Enhanced budget management with stricter checks for critical context overspending and buffer limitations.
- Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments.
- Updated default assembly timeout settings for improved performance and reliability.
- Improved XML escaping in Claude adapter for safety against injection attacks.
- Standardized token estimation using model-specific ratios.
This commit is contained in:
2026-01-04 15:52:50 +01:00
parent 2bea057fb1
commit 1628eacf2b
10 changed files with 271 additions and 175 deletions

View File

@@ -10,6 +10,7 @@ from typing import TYPE_CHECKING, Any
from ..budget import TokenBudget, TokenCalculator
from ..config import ContextSettings, get_context_settings
from ..exceptions import BudgetExceededError
from ..scoring.composite import CompositeScorer, ScoredContext
from ..types import BaseContext, ContextPriority
@@ -127,6 +128,9 @@ class ContextRanker:
excluded: list[ScoredContext] = []
total_tokens = 0
# Calculate the usable budget (total minus reserved portions)
usable_budget = budget.total - budget.response_reserve - budget.buffer
# First, try to fit required contexts
for sc in required:
token_count = sc.context.token_count or 0
@@ -137,7 +141,20 @@ class ContextRanker:
selected.append(sc)
total_tokens += token_count
else:
# Force-fit CRITICAL contexts if needed
# Force-fit CRITICAL contexts if needed, but check total budget first
if total_tokens + token_count > usable_budget:
# Even CRITICAL contexts cannot exceed total model context window
raise BudgetExceededError(
message=(
f"CRITICAL contexts exceed total budget. "
f"Context '{sc.context.source}' ({token_count} tokens) "
f"would exceed usable budget of {usable_budget} tokens."
),
allocated=usable_budget,
requested=total_tokens + token_count,
context_type="CRITICAL_OVERFLOW",
)
budget.allocate(context_type, token_count, force=True)
selected.append(sc)
total_tokens += token_count