feat(context): enhance timeout handling, tenant isolation, and budget management

- Added timeout enforcement for token counting, scoring, and compression with detailed error handling.
- Introduced tenant isolation in context caching using project and agent identifiers.
- Enhanced budget management with stricter checks for critical context overspending and buffer limitations.
- Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments.
- Updated default assembly timeout settings for improved performance and reliability.
- Improved XML escaping in Claude adapter for safety against injection attacks.
- Standardized token estimation using model-specific ratios.
This commit is contained in:
2026-01-04 15:52:50 +01:00
parent 2bea057fb1
commit 1628eacf2b
10 changed files with 271 additions and 175 deletions

View File

@@ -19,6 +19,40 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
def _estimate_tokens(text: str, model: str | None = None) -> int:
"""
Estimate token count using model-specific character ratios.
Module-level function for reuse across classes. Uses the same ratios
as TokenCalculator for consistency.
Args:
text: Text to estimate tokens for
model: Optional model name for model-specific ratios
Returns:
Estimated token count (minimum 1)
"""
# Model-specific character ratios (chars per token)
model_ratios = {
"claude": 3.5,
"gpt-4": 4.0,
"gpt-3.5": 4.0,
"gemini": 4.0,
}
default_ratio = 4.0
ratio = default_ratio
if model:
model_lower = model.lower()
for model_prefix, model_ratio in model_ratios.items():
if model_prefix in model_lower:
ratio = model_ratio
break
return max(1, int(len(text) / ratio))
@dataclass
class TruncationResult:
"""Result of truncation operation."""
@@ -284,8 +318,8 @@ class TruncationStrategy:
if self._calculator is not None:
return await self._calculator.count_tokens(text, model)
# Fallback estimation
return max(1, len(text) // 4)
# Fallback estimation with model-specific ratios
return _estimate_tokens(text, model)
class ContextCompressor:
@@ -415,4 +449,5 @@ class ContextCompressor:
"""Count tokens using calculator or estimation."""
if self._calculator is not None:
return await self._calculator.count_tokens(text, model)
return max(1, len(text) // 4)
# Use model-specific estimation for consistency
return _estimate_tokens(text, model)