forked from cardosofelipe/fast-next-template
Implements the foundation for Context Management Engine: Types (backend/app/services/context/types/): - BaseContext: Abstract base with ID, content, priority, scoring - SystemContext: System prompts, personas, instructions - KnowledgeContext: RAG results from Knowledge Base MCP - ConversationContext: Chat history with role support - TaskContext: Task/issue context with acceptance criteria - ToolContext: Tool definitions and execution results - AssembledContext: Final assembled context result Configuration (config.py): - Token budget allocation (system 5%, task 10%, knowledge 40%, etc.) - Scoring weights (relevance 50%, recency 30%, priority 20%) - Cache settings (TTL, prefix) - Performance settings (max assembly time, parallel scoring) - Environment variable overrides with CTX_ prefix Exceptions (exceptions.py): - ContextError: Base exception - BudgetExceededError: Token budget violations - TokenCountError: Token counting failures - CompressionError: Compression failures - AssemblyTimeoutError: Assembly timeout - ScoringError, FormattingError, CacheError - ContextNotFoundError, InvalidContextError All 86 tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
329 lines
9.6 KiB
Python
329 lines
9.6 KiB
Python
"""
|
|
Context Management Engine Configuration.
|
|
|
|
Provides Pydantic settings for context assembly,
|
|
token budget allocation, and caching.
|
|
"""
|
|
|
|
import threading
|
|
from functools import lru_cache
|
|
from typing import Any
|
|
|
|
from pydantic import Field, field_validator, model_validator
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class ContextSettings(BaseSettings):
|
|
"""
|
|
Configuration for the Context Management Engine.
|
|
|
|
All settings can be overridden via environment variables
|
|
with the CTX_ prefix.
|
|
"""
|
|
|
|
# Budget allocation percentages (must sum to 1.0)
|
|
budget_system: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for system prompts (5%)",
|
|
)
|
|
budget_task: float = Field(
|
|
default=0.10,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for task context (10%)",
|
|
)
|
|
budget_knowledge: float = Field(
|
|
default=0.40,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for RAG/knowledge (40%)",
|
|
)
|
|
budget_conversation: float = Field(
|
|
default=0.20,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for conversation history (20%)",
|
|
)
|
|
budget_tools: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage of budget for tool descriptions (5%)",
|
|
)
|
|
budget_response: float = Field(
|
|
default=0.15,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage reserved for response (15%)",
|
|
)
|
|
budget_buffer: float = Field(
|
|
default=0.05,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Percentage buffer for safety margin (5%)",
|
|
)
|
|
|
|
# Scoring weights
|
|
scoring_relevance_weight: float = Field(
|
|
default=0.5,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for relevance scoring",
|
|
)
|
|
scoring_recency_weight: float = Field(
|
|
default=0.3,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for recency scoring",
|
|
)
|
|
scoring_priority_weight: float = Field(
|
|
default=0.2,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Weight for priority scoring",
|
|
)
|
|
|
|
# Recency decay settings
|
|
recency_decay_hours: float = Field(
|
|
default=24.0,
|
|
gt=0.0,
|
|
description="Hours until recency score decays to 50%",
|
|
)
|
|
recency_max_age_hours: float = Field(
|
|
default=168.0,
|
|
gt=0.0,
|
|
description="Hours until context is considered stale (7 days)",
|
|
)
|
|
|
|
# Compression settings
|
|
compression_threshold: float = Field(
|
|
default=0.8,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Compress when budget usage exceeds this percentage",
|
|
)
|
|
truncation_suffix: str = Field(
|
|
default="... [truncated]",
|
|
description="Suffix to add when truncating content",
|
|
)
|
|
summary_model_group: str = Field(
|
|
default="fast",
|
|
description="Model group to use for summarization",
|
|
)
|
|
|
|
# Caching settings
|
|
cache_enabled: bool = Field(
|
|
default=True,
|
|
description="Enable Redis caching for assembled contexts",
|
|
)
|
|
cache_ttl_seconds: int = Field(
|
|
default=3600,
|
|
ge=60,
|
|
le=86400,
|
|
description="Cache TTL in seconds (1 hour default, max 24 hours)",
|
|
)
|
|
cache_prefix: str = Field(
|
|
default="ctx",
|
|
description="Redis key prefix for context cache",
|
|
)
|
|
|
|
# Performance settings
|
|
max_assembly_time_ms: int = Field(
|
|
default=100,
|
|
ge=10,
|
|
le=5000,
|
|
description="Maximum time for context assembly in milliseconds",
|
|
)
|
|
parallel_scoring: bool = Field(
|
|
default=True,
|
|
description="Score contexts in parallel for better performance",
|
|
)
|
|
max_parallel_scores: int = Field(
|
|
default=10,
|
|
ge=1,
|
|
le=50,
|
|
description="Maximum number of contexts to score in parallel",
|
|
)
|
|
|
|
# Knowledge retrieval settings
|
|
knowledge_search_type: str = Field(
|
|
default="hybrid",
|
|
description="Default search type for knowledge retrieval",
|
|
)
|
|
knowledge_max_results: int = Field(
|
|
default=10,
|
|
ge=1,
|
|
le=50,
|
|
description="Maximum knowledge chunks to retrieve",
|
|
)
|
|
knowledge_min_score: float = Field(
|
|
default=0.5,
|
|
ge=0.0,
|
|
le=1.0,
|
|
description="Minimum relevance score for knowledge",
|
|
)
|
|
|
|
# Conversation history settings
|
|
conversation_max_turns: int = Field(
|
|
default=20,
|
|
ge=1,
|
|
le=100,
|
|
description="Maximum conversation turns to include",
|
|
)
|
|
conversation_recent_priority: bool = Field(
|
|
default=True,
|
|
description="Prioritize recent conversation turns",
|
|
)
|
|
|
|
@field_validator("knowledge_search_type")
|
|
@classmethod
|
|
def validate_search_type(cls, v: str) -> str:
|
|
"""Validate search type is valid."""
|
|
valid_types = {"semantic", "keyword", "hybrid"}
|
|
if v not in valid_types:
|
|
raise ValueError(f"search_type must be one of: {valid_types}")
|
|
return v
|
|
|
|
@model_validator(mode="after")
|
|
def validate_budget_allocation(self) -> "ContextSettings":
|
|
"""Validate that budget percentages sum to 1.0."""
|
|
total = (
|
|
self.budget_system
|
|
+ self.budget_task
|
|
+ self.budget_knowledge
|
|
+ self.budget_conversation
|
|
+ self.budget_tools
|
|
+ self.budget_response
|
|
+ self.budget_buffer
|
|
)
|
|
# Allow small floating point error
|
|
if abs(total - 1.0) > 0.001:
|
|
raise ValueError(
|
|
f"Budget percentages must sum to 1.0, got {total:.3f}. "
|
|
f"Current allocation: system={self.budget_system}, task={self.budget_task}, "
|
|
f"knowledge={self.budget_knowledge}, conversation={self.budget_conversation}, "
|
|
f"tools={self.budget_tools}, response={self.budget_response}, buffer={self.budget_buffer}"
|
|
)
|
|
return self
|
|
|
|
@model_validator(mode="after")
|
|
def validate_scoring_weights(self) -> "ContextSettings":
|
|
"""Validate that scoring weights sum to 1.0."""
|
|
total = (
|
|
self.scoring_relevance_weight
|
|
+ self.scoring_recency_weight
|
|
+ self.scoring_priority_weight
|
|
)
|
|
# Allow small floating point error
|
|
if abs(total - 1.0) > 0.001:
|
|
raise ValueError(
|
|
f"Scoring weights must sum to 1.0, got {total:.3f}. "
|
|
f"Current weights: relevance={self.scoring_relevance_weight}, "
|
|
f"recency={self.scoring_recency_weight}, priority={self.scoring_priority_weight}"
|
|
)
|
|
return self
|
|
|
|
def get_budget_allocation(self) -> dict[str, float]:
|
|
"""Get budget allocation as a dictionary."""
|
|
return {
|
|
"system": self.budget_system,
|
|
"task": self.budget_task,
|
|
"knowledge": self.budget_knowledge,
|
|
"conversation": self.budget_conversation,
|
|
"tools": self.budget_tools,
|
|
"response": self.budget_response,
|
|
"buffer": self.budget_buffer,
|
|
}
|
|
|
|
def get_scoring_weights(self) -> dict[str, float]:
|
|
"""Get scoring weights as a dictionary."""
|
|
return {
|
|
"relevance": self.scoring_relevance_weight,
|
|
"recency": self.scoring_recency_weight,
|
|
"priority": self.scoring_priority_weight,
|
|
}
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
"""Convert settings to dictionary for logging/debugging."""
|
|
return {
|
|
"budget": self.get_budget_allocation(),
|
|
"scoring": self.get_scoring_weights(),
|
|
"compression": {
|
|
"threshold": self.compression_threshold,
|
|
"summary_model_group": self.summary_model_group,
|
|
},
|
|
"cache": {
|
|
"enabled": self.cache_enabled,
|
|
"ttl_seconds": self.cache_ttl_seconds,
|
|
"prefix": self.cache_prefix,
|
|
},
|
|
"performance": {
|
|
"max_assembly_time_ms": self.max_assembly_time_ms,
|
|
"parallel_scoring": self.parallel_scoring,
|
|
"max_parallel_scores": self.max_parallel_scores,
|
|
},
|
|
"knowledge": {
|
|
"search_type": self.knowledge_search_type,
|
|
"max_results": self.knowledge_max_results,
|
|
"min_score": self.knowledge_min_score,
|
|
},
|
|
"conversation": {
|
|
"max_turns": self.conversation_max_turns,
|
|
"recent_priority": self.conversation_recent_priority,
|
|
},
|
|
}
|
|
|
|
model_config = {
|
|
"env_prefix": "CTX_",
|
|
"env_file": "../.env",
|
|
"env_file_encoding": "utf-8",
|
|
"case_sensitive": False,
|
|
"extra": "ignore",
|
|
}
|
|
|
|
|
|
# Thread-safe singleton pattern
|
|
_settings: ContextSettings | None = None
|
|
_settings_lock = threading.Lock()
|
|
|
|
|
|
def get_context_settings() -> ContextSettings:
|
|
"""
|
|
Get the global ContextSettings instance.
|
|
|
|
Thread-safe with double-checked locking pattern.
|
|
|
|
Returns:
|
|
ContextSettings instance
|
|
"""
|
|
global _settings
|
|
if _settings is None:
|
|
with _settings_lock:
|
|
if _settings is None:
|
|
_settings = ContextSettings()
|
|
return _settings
|
|
|
|
|
|
def reset_context_settings() -> None:
|
|
"""
|
|
Reset the global settings instance.
|
|
|
|
Primarily used for testing.
|
|
"""
|
|
global _settings
|
|
with _settings_lock:
|
|
_settings = None
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_default_settings() -> ContextSettings:
|
|
"""
|
|
Get default settings (cached).
|
|
|
|
Use this for read-only access to defaults.
|
|
For mutable access, use get_context_settings().
|
|
"""
|
|
return ContextSettings()
|