""" Context Management Engine Configuration. Provides Pydantic settings for context assembly, token budget allocation, and caching. """ import threading from functools import lru_cache from typing import Any from pydantic import Field, field_validator, model_validator from pydantic_settings import BaseSettings class ContextSettings(BaseSettings): """ Configuration for the Context Management Engine. All settings can be overridden via environment variables with the CTX_ prefix. """ # Budget allocation percentages (must sum to 1.0) budget_system: float = Field( default=0.05, ge=0.0, le=1.0, description="Percentage of budget for system prompts (5%)", ) budget_task: float = Field( default=0.10, ge=0.0, le=1.0, description="Percentage of budget for task context (10%)", ) budget_knowledge: float = Field( default=0.40, ge=0.0, le=1.0, description="Percentage of budget for RAG/knowledge (40%)", ) budget_conversation: float = Field( default=0.20, ge=0.0, le=1.0, description="Percentage of budget for conversation history (20%)", ) budget_tools: float = Field( default=0.05, ge=0.0, le=1.0, description="Percentage of budget for tool descriptions (5%)", ) budget_response: float = Field( default=0.15, ge=0.0, le=1.0, description="Percentage reserved for response (15%)", ) budget_buffer: float = Field( default=0.05, ge=0.0, le=1.0, description="Percentage buffer for safety margin (5%)", ) # Scoring weights scoring_relevance_weight: float = Field( default=0.5, ge=0.0, le=1.0, description="Weight for relevance scoring", ) scoring_recency_weight: float = Field( default=0.3, ge=0.0, le=1.0, description="Weight for recency scoring", ) scoring_priority_weight: float = Field( default=0.2, ge=0.0, le=1.0, description="Weight for priority scoring", ) # Recency decay settings recency_decay_hours: float = Field( default=24.0, gt=0.0, description="Hours until recency score decays to 50%", ) recency_max_age_hours: float = Field( default=168.0, gt=0.0, description="Hours until context is considered stale (7 days)", ) # Compression settings compression_threshold: float = Field( default=0.8, ge=0.0, le=1.0, description="Compress when budget usage exceeds this percentage", ) truncation_marker: str = Field( default="\n\n[...content truncated...]\n\n", description="Marker text to insert where content was truncated", ) truncation_preserve_ratio: float = Field( default=0.7, ge=0.1, le=0.9, description="Ratio of content to preserve from start in middle truncation (0.7 = 70% start, 30% end)", ) truncation_min_content_length: int = Field( default=100, ge=10, le=1000, description="Minimum content length in characters before truncation applies", ) summary_model_group: str = Field( default="fast", description="Model group to use for summarization", ) # Caching settings cache_enabled: bool = Field( default=True, description="Enable Redis caching for assembled contexts", ) cache_ttl_seconds: int = Field( default=3600, ge=60, le=86400, description="Cache TTL in seconds (1 hour default, max 24 hours)", ) cache_prefix: str = Field( default="ctx", description="Redis key prefix for context cache", ) cache_memory_max_items: int = Field( default=1000, ge=100, le=100000, description="Maximum items in memory fallback cache when Redis unavailable", ) # Performance settings max_assembly_time_ms: int = Field( default=2000, ge=10, le=30000, description="Maximum time for context assembly in milliseconds. " "Should be high enough to accommodate MCP calls for knowledge retrieval.", ) parallel_scoring: bool = Field( default=True, description="Score contexts in parallel for better performance", ) max_parallel_scores: int = Field( default=10, ge=1, le=50, description="Maximum number of contexts to score in parallel", ) # Knowledge retrieval settings knowledge_search_type: str = Field( default="hybrid", description="Default search type for knowledge retrieval", ) knowledge_max_results: int = Field( default=10, ge=1, le=50, description="Maximum knowledge chunks to retrieve", ) knowledge_min_score: float = Field( default=0.5, ge=0.0, le=1.0, description="Minimum relevance score for knowledge", ) # Relevance scoring settings relevance_keyword_fallback_weight: float = Field( default=0.5, ge=0.0, le=1.0, description="Maximum score for keyword-based fallback scoring (when semantic unavailable)", ) relevance_semantic_max_chars: int = Field( default=2000, ge=100, le=10000, description="Maximum content length in chars for semantic similarity computation", ) # Diversity/ranking settings diversity_max_per_source: int = Field( default=3, ge=1, le=20, description="Maximum contexts from the same source in diversity reranking", ) # Conversation history settings conversation_max_turns: int = Field( default=20, ge=1, le=100, description="Maximum conversation turns to include", ) conversation_recent_priority: bool = Field( default=True, description="Prioritize recent conversation turns", ) @field_validator("knowledge_search_type") @classmethod def validate_search_type(cls, v: str) -> str: """Validate search type is valid.""" valid_types = {"semantic", "keyword", "hybrid"} if v not in valid_types: raise ValueError(f"search_type must be one of: {valid_types}") return v @model_validator(mode="after") def validate_budget_allocation(self) -> "ContextSettings": """Validate that budget percentages sum to 1.0.""" total = ( self.budget_system + self.budget_task + self.budget_knowledge + self.budget_conversation + self.budget_tools + self.budget_response + self.budget_buffer ) # Allow small floating point error if abs(total - 1.0) > 0.001: raise ValueError( f"Budget percentages must sum to 1.0, got {total:.3f}. " f"Current allocation: system={self.budget_system}, task={self.budget_task}, " f"knowledge={self.budget_knowledge}, conversation={self.budget_conversation}, " f"tools={self.budget_tools}, response={self.budget_response}, buffer={self.budget_buffer}" ) return self @model_validator(mode="after") def validate_scoring_weights(self) -> "ContextSettings": """Validate that scoring weights sum to 1.0.""" total = ( self.scoring_relevance_weight + self.scoring_recency_weight + self.scoring_priority_weight ) # Allow small floating point error if abs(total - 1.0) > 0.001: raise ValueError( f"Scoring weights must sum to 1.0, got {total:.3f}. " f"Current weights: relevance={self.scoring_relevance_weight}, " f"recency={self.scoring_recency_weight}, priority={self.scoring_priority_weight}" ) return self def get_budget_allocation(self) -> dict[str, float]: """Get budget allocation as a dictionary.""" return { "system": self.budget_system, "task": self.budget_task, "knowledge": self.budget_knowledge, "conversation": self.budget_conversation, "tools": self.budget_tools, "response": self.budget_response, "buffer": self.budget_buffer, } def get_scoring_weights(self) -> dict[str, float]: """Get scoring weights as a dictionary.""" return { "relevance": self.scoring_relevance_weight, "recency": self.scoring_recency_weight, "priority": self.scoring_priority_weight, } def to_dict(self) -> dict[str, Any]: """Convert settings to dictionary for logging/debugging.""" return { "budget": self.get_budget_allocation(), "scoring": self.get_scoring_weights(), "compression": { "threshold": self.compression_threshold, "summary_model_group": self.summary_model_group, "truncation_marker": self.truncation_marker, "truncation_preserve_ratio": self.truncation_preserve_ratio, "truncation_min_content_length": self.truncation_min_content_length, }, "cache": { "enabled": self.cache_enabled, "ttl_seconds": self.cache_ttl_seconds, "prefix": self.cache_prefix, "memory_max_items": self.cache_memory_max_items, }, "performance": { "max_assembly_time_ms": self.max_assembly_time_ms, "parallel_scoring": self.parallel_scoring, "max_parallel_scores": self.max_parallel_scores, }, "knowledge": { "search_type": self.knowledge_search_type, "max_results": self.knowledge_max_results, "min_score": self.knowledge_min_score, }, "relevance": { "keyword_fallback_weight": self.relevance_keyword_fallback_weight, "semantic_max_chars": self.relevance_semantic_max_chars, }, "diversity": { "max_per_source": self.diversity_max_per_source, }, "conversation": { "max_turns": self.conversation_max_turns, "recent_priority": self.conversation_recent_priority, }, } model_config = { "env_prefix": "CTX_", "env_file": "../.env", "env_file_encoding": "utf-8", "case_sensitive": False, "extra": "ignore", } # Thread-safe singleton pattern _settings: ContextSettings | None = None _settings_lock = threading.Lock() def get_context_settings() -> ContextSettings: """ Get the global ContextSettings instance. Thread-safe with double-checked locking pattern. Returns: ContextSettings instance """ global _settings if _settings is None: with _settings_lock: if _settings is None: _settings = ContextSettings() return _settings def reset_context_settings() -> None: """ Reset the global settings instance. Primarily used for testing. """ global _settings with _settings_lock: _settings = None @lru_cache(maxsize=1) def get_default_settings() -> ContextSettings: """ Get default settings (cached). Use this for read-only access to defaults. For mutable access, use get_context_settings(). """ return ContextSettings()