- Added timeout enforcement for token counting, scoring, and compression with detailed error handling. - Introduced tenant isolation in context caching using project and agent identifiers. - Enhanced budget management with stricter checks for critical context overspending and buffer limitations. - Optimized per-context locking with cleanup to prevent memory leaks in concurrent environments. - Updated default assembly timeout settings for improved performance and reliability. - Improved XML escaping in Claude adapter for safety against injection attacks. - Standardized token estimation using model-specific ratios.
381 lines
12 KiB
Python
381 lines
12 KiB
Python
"""
|
|
Context Management Engine Configuration.
|
|
|
|
Provides Pydantic settings for context assembly,
|
|
token budget allocation, and caching.
|
|
"""
|
|
|
|
import threading
|
|
from functools import lru_cache
|
|
from typing import Any
|
|
|
|
from pydantic import Field, field_validator, model_validator
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class ContextSettings(BaseSettings):
|
|
"""
|
|
Configuration for the Context Management Engine.
|
|
|
|
All settings can be overridden via environment variables
|
|
with the CTX_ prefix.
|
|
"""
|
|
|
|
# Budget allocation percentages (must sum to 1.0)
|
|
budget_system: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for system prompts (5%)",
|
|
)
|
|
budget_task: float = Field(
|
|
default=0.10,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for task context (10%)",
|
|
)
|
|
budget_knowledge: float = Field(
|
|
default=0.40,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for RAG/knowledge (40%)",
|
|
)
|
|
budget_conversation: float = Field(
|
|
default=0.20,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for conversation history (20%)",
|
|
)
|
|
budget_tools: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for tool descriptions (5%)",
|
|
)
|
|
budget_response: float = Field(
|
|
default=0.15,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage reserved for response (15%)",
|
|
)
|
|
budget_buffer: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage buffer for safety margin (5%)",
|
|
)
|
|
|
|
# Scoring weights
|
|
scoring_relevance_weight: float = Field(
|
|
default=0.5,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for relevance scoring",
|
|
)
|
|
scoring_recency_weight: float = Field(
|
|
default=0.3,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for recency scoring",
|
|
)
|
|
scoring_priority_weight: float = Field(
|
|
default=0.2,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for priority scoring",
|
|
)
|
|
|
|
# Recency decay settings
|
|
recency_decay_hours: float = Field(
|
|
default=24.0,
|
|
gt=0.0,
|
|
description="Hours until recency score decays to 50%",
|
|
)
|
|
recency_max_age_hours: float = Field(
|
|
default=168.0,
|
|
gt=0.0,
|
|
description="Hours until context is considered stale (7 days)",
|
|
)
|
|
|
|
# Compression settings
|
|
compression_threshold: float = Field(
|
|
default=0.8,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Compress when budget usage exceeds this percentage",
|
|
)
|
|
truncation_marker: str = Field(
|
|
default="\n\n[...content truncated...]\n\n",
|
|
description="Marker text to insert where content was truncated",
|
|
)
|
|
truncation_preserve_ratio: float = Field(
|
|
default=0.7,
|
|
ge=0.1,
|
|
le=0.9,
|
|
description="Ratio of content to preserve from start in middle truncation (0.7 = 70% start, 30% end)",
|
|
)
|
|
truncation_min_content_length: int = Field(
|
|
default=100,
|
|
ge=10,
|
|
le=1000,
|
|
description="Minimum content length in characters before truncation applies",
|
|
)
|
|
summary_model_group: str = Field(
|
|
default="fast",
|
|
description="Model group to use for summarization",
|
|
)
|
|
|
|
# Caching settings
|
|
cache_enabled: bool = Field(
|
|
default=True,
|
|
description="Enable Redis caching for assembled contexts",
|
|
)
|
|
cache_ttl_seconds: int = Field(
|
|
default=3600,
|
|
ge=60,
|
|
le=86400,
|
|
description="Cache TTL in seconds (1 hour default, max 24 hours)",
|
|
)
|
|
cache_prefix: str = Field(
|
|
default="ctx",
|
|
description="Redis key prefix for context cache",
|
|
)
|
|
cache_memory_max_items: int = Field(
|
|
default=1000,
|
|
ge=100,
|
|
le=100000,
|
|
description="Maximum items in memory fallback cache when Redis unavailable",
|
|
)
|
|
|
|
# Performance settings
|
|
max_assembly_time_ms: int = Field(
|
|
default=2000,
|
|
ge=10,
|
|
le=30000,
|
|
description="Maximum time for context assembly in milliseconds. "
|
|
"Should be high enough to accommodate MCP calls for knowledge retrieval.",
|
|
)
|
|
parallel_scoring: bool = Field(
|
|
default=True,
|
|
description="Score contexts in parallel for better performance",
|
|
)
|
|
max_parallel_scores: int = Field(
|
|
default=10,
|
|
ge=1,
|
|
le=50,
|
|
description="Maximum number of contexts to score in parallel",
|
|
)
|
|
|
|
# Knowledge retrieval settings
|
|
knowledge_search_type: str = Field(
|
|
default="hybrid",
|
|
description="Default search type for knowledge retrieval",
|
|
)
|
|
knowledge_max_results: int = Field(
|
|
default=10,
|
|
ge=1,
|
|
le=50,
|
|
description="Maximum knowledge chunks to retrieve",
|
|
)
|
|
knowledge_min_score: float = Field(
|
|
default=0.5,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Minimum relevance score for knowledge",
|
|
)
|
|
|
|
# Relevance scoring settings
|
|
relevance_keyword_fallback_weight: float = Field(
|
|
default=0.5,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Maximum score for keyword-based fallback scoring (when semantic unavailable)",
|
|
)
|
|
relevance_semantic_max_chars: int = Field(
|
|
default=2000,
|
|
ge=100,
|
|
le=10000,
|
|
description="Maximum content length in chars for semantic similarity computation",
|
|
)
|
|
|
|
# Diversity/ranking settings
|
|
diversity_max_per_source: int = Field(
|
|
default=3,
|
|
ge=1,
|
|
le=20,
|
|
description="Maximum contexts from the same source in diversity reranking",
|
|
)
|
|
|
|
# Conversation history settings
|
|
conversation_max_turns: int = Field(
|
|
default=20,
|
|
ge=1,
|
|
le=100,
|
|
description="Maximum conversation turns to include",
|
|
)
|
|
conversation_recent_priority: bool = Field(
|
|
default=True,
|
|
description="Prioritize recent conversation turns",
|
|
)
|
|
|
|
@field_validator("knowledge_search_type")
|
|
@classmethod
|
|
def validate_search_type(cls, v: str) -> str:
|
|
"""Validate search type is valid."""
|
|
valid_types = {"semantic", "keyword", "hybrid"}
|
|
if v not in valid_types:
|
|
raise ValueError(f"search_type must be one of: {valid_types}")
|
|
return v
|
|
|
|
@model_validator(mode="after")
|
|
def validate_budget_allocation(self) -> "ContextSettings":
|
|
"""Validate that budget percentages sum to 1.0."""
|
|
total = (
|
|
self.budget_system
|
|
+ self.budget_task
|
|
+ self.budget_knowledge
|
|
+ self.budget_conversation
|
|
+ self.budget_tools
|
|
+ self.budget_response
|
|
+ self.budget_buffer
|
|
)
|
|
# Allow small floating point error
|
|
if abs(total - 1.0) > 0.001:
|
|
raise ValueError(
|
|
f"Budget percentages must sum to 1.0, got {total:.3f}. "
|
|
f"Current allocation: system={self.budget_system}, task={self.budget_task}, "
|
|
f"knowledge={self.budget_knowledge}, conversation={self.budget_conversation}, "
|
|
f"tools={self.budget_tools}, response={self.budget_response}, buffer={self.budget_buffer}"
|
|
)
|
|
return self
|
|
|
|
@model_validator(mode="after")
|
|
def validate_scoring_weights(self) -> "ContextSettings":
|
|
"""Validate that scoring weights sum to 1.0."""
|
|
total = (
|
|
self.scoring_relevance_weight
|
|
+ self.scoring_recency_weight
|
|
+ self.scoring_priority_weight
|
|
)
|
|
# Allow small floating point error
|
|
if abs(total - 1.0) > 0.001:
|
|
raise ValueError(
|
|
f"Scoring weights must sum to 1.0, got {total:.3f}. "
|
|
f"Current weights: relevance={self.scoring_relevance_weight}, "
|
|
f"recency={self.scoring_recency_weight}, priority={self.scoring_priority_weight}"
|
|
)
|
|
return self
|
|
|
|
def get_budget_allocation(self) -> dict[str, float]:
|
|
"""Get budget allocation as a dictionary."""
|
|
return {
|
|
"system": self.budget_system,
|
|
"task": self.budget_task,
|
|
"knowledge": self.budget_knowledge,
|
|
"conversation": self.budget_conversation,
|
|
"tools": self.budget_tools,
|
|
"response": self.budget_response,
|
|
"buffer": self.budget_buffer,
|
|
}
|
|
|
|
def get_scoring_weights(self) -> dict[str, float]:
|
|
"""Get scoring weights as a dictionary."""
|
|
return {
|
|
"relevance": self.scoring_relevance_weight,
|
|
"recency": self.scoring_recency_weight,
|
|
"priority": self.scoring_priority_weight,
|
|
}
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
"""Convert settings to dictionary for logging/debugging."""
|
|
return {
|
|
"budget": self.get_budget_allocation(),
|
|
"scoring": self.get_scoring_weights(),
|
|
"compression": {
|
|
"threshold": self.compression_threshold,
|
|
"summary_model_group": self.summary_model_group,
|
|
"truncation_marker": self.truncation_marker,
|
|
"truncation_preserve_ratio": self.truncation_preserve_ratio,
|
|
"truncation_min_content_length": self.truncation_min_content_length,
|
|
},
|
|
"cache": {
|
|
"enabled": self.cache_enabled,
|
|
"ttl_seconds": self.cache_ttl_seconds,
|
|
"prefix": self.cache_prefix,
|
|
"memory_max_items": self.cache_memory_max_items,
|
|
},
|
|
"performance": {
|
|
"max_assembly_time_ms": self.max_assembly_time_ms,
|
|
"parallel_scoring": self.parallel_scoring,
|
|
"max_parallel_scores": self.max_parallel_scores,
|
|
},
|
|
"knowledge": {
|
|
"search_type": self.knowledge_search_type,
|
|
"max_results": self.knowledge_max_results,
|
|
"min_score": self.knowledge_min_score,
|
|
},
|
|
"relevance": {
|
|
"keyword_fallback_weight": self.relevance_keyword_fallback_weight,
|
|
"semantic_max_chars": self.relevance_semantic_max_chars,
|
|
},
|
|
"diversity": {
|
|
"max_per_source": self.diversity_max_per_source,
|
|
},
|
|
"conversation": {
|
|
"max_turns": self.conversation_max_turns,
|
|
"recent_priority": self.conversation_recent_priority,
|
|
},
|
|
}
|
|
|
|
model_config = {
|
|
"env_prefix": "CTX_",
|
|
"env_file": "../.env",
|
|
"env_file_encoding": "utf-8",
|
|
"case_sensitive": False,
|
|
"extra": "ignore",
|
|
}
|
|
|
|
|
|
# Thread-safe singleton pattern
|
|
_settings: ContextSettings | None = None
|
|
_settings_lock = threading.Lock()
|
|
|
|
|
|
def get_context_settings() -> ContextSettings:
|
|
"""
|
|
Get the global ContextSettings instance.
|
|
|
|
Thread-safe with double-checked locking pattern.
|
|
|
|
Returns:
|
|
ContextSettings instance
|
|
"""
|
|
global _settings
|
|
if _settings is None:
|
|
with _settings_lock:
|
|
if _settings is None:
|
|
_settings = ContextSettings()
|
|
return _settings
|
|
|
|
|
|
def reset_context_settings() -> None:
|
|
"""
|
|
Reset the global settings instance.
|
|
|
|
Primarily used for testing.
|
|
"""
|
|
global _settings
|
|
with _settings_lock:
|
|
_settings = None
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_default_settings() -> ContextSettings:
|
|
"""
|
|
Get default settings (cached).
|
|
|
|
Use this for read-only access to defaults.
|
|
For mutable access, use get_context_settings().
|
|
"""
|
|
return ContextSettings()
|