forked from cardosofelipe/fast-next-template
feat(context): Phase 1 - Foundation types, config and exceptions (#79)
Implements the foundation for Context Management Engine: Types (backend/app/services/context/types/): - BaseContext: Abstract base with ID, content, priority, scoring - SystemContext: System prompts, personas, instructions - KnowledgeContext: RAG results from Knowledge Base MCP - ConversationContext: Chat history with role support - TaskContext: Task/issue context with acceptance criteria - ToolContext: Tool definitions and execution results - AssembledContext: Final assembled context result Configuration (config.py): - Token budget allocation (system 5%, task 10%, knowledge 40%, etc.) - Scoring weights (relevance 50%, recency 30%, priority 20%) - Cache settings (TTL, prefix) - Performance settings (max assembly time, parallel scoring) - Environment variable overrides with CTX_ prefix Exceptions (exceptions.py): - ContextError: Base exception - BudgetExceededError: Token budget violations - TokenCountError: Token counting failures - CompressionError: Compression failures - AssemblyTimeoutError: Assembly timeout - ScoringError, FormattingError, CacheError - ContextNotFoundError, InvalidContextError All 86 tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
328
backend/app/services/context/config.py
Normal file
328
backend/app/services/context/config.py
Normal file
@@ -0,0 +1,328 @@
|
||||
"""
|
||||
Context Management Engine Configuration.
|
||||
|
||||
Provides Pydantic settings for context assembly,
|
||||
token budget allocation, and caching.
|
||||
"""
|
||||
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field, field_validator, model_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ContextSettings(BaseSettings):
|
||||
"""
|
||||
Configuration for the Context Management Engine.
|
||||
|
||||
All settings can be overridden via environment variables
|
||||
with the CTX_ prefix.
|
||||
"""
|
||||
|
||||
# Budget allocation percentages (must sum to 1.0)
|
||||
budget_system: float = Field(
|
||||
default=0.05,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage of budget for system prompts (5%)",
|
||||
)
|
||||
budget_task: float = Field(
|
||||
default=0.10,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage of budget for task context (10%)",
|
||||
)
|
||||
budget_knowledge: float = Field(
|
||||
default=0.40,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage of budget for RAG/knowledge (40%)",
|
||||
)
|
||||
budget_conversation: float = Field(
|
||||
default=0.20,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage of budget for conversation history (20%)",
|
||||
)
|
||||
budget_tools: float = Field(
|
||||
default=0.05,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage of budget for tool descriptions (5%)",
|
||||
)
|
||||
budget_response: float = Field(
|
||||
default=0.15,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage reserved for response (15%)",
|
||||
)
|
||||
budget_buffer: float = Field(
|
||||
default=0.05,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Percentage buffer for safety margin (5%)",
|
||||
)
|
||||
|
||||
# Scoring weights
|
||||
scoring_relevance_weight: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight for relevance scoring",
|
||||
)
|
||||
scoring_recency_weight: float = Field(
|
||||
default=0.3,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight for recency scoring",
|
||||
)
|
||||
scoring_priority_weight: float = Field(
|
||||
default=0.2,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Weight for priority scoring",
|
||||
)
|
||||
|
||||
# Recency decay settings
|
||||
recency_decay_hours: float = Field(
|
||||
default=24.0,
|
||||
gt=0.0,
|
||||
description="Hours until recency score decays to 50%",
|
||||
)
|
||||
recency_max_age_hours: float = Field(
|
||||
default=168.0,
|
||||
gt=0.0,
|
||||
description="Hours until context is considered stale (7 days)",
|
||||
)
|
||||
|
||||
# Compression settings
|
||||
compression_threshold: float = Field(
|
||||
default=0.8,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Compress when budget usage exceeds this percentage",
|
||||
)
|
||||
truncation_suffix: str = Field(
|
||||
default="... [truncated]",
|
||||
description="Suffix to add when truncating content",
|
||||
)
|
||||
summary_model_group: str = Field(
|
||||
default="fast",
|
||||
description="Model group to use for summarization",
|
||||
)
|
||||
|
||||
# Caching settings
|
||||
cache_enabled: bool = Field(
|
||||
default=True,
|
||||
description="Enable Redis caching for assembled contexts",
|
||||
)
|
||||
cache_ttl_seconds: int = Field(
|
||||
default=3600,
|
||||
ge=60,
|
||||
le=86400,
|
||||
description="Cache TTL in seconds (1 hour default, max 24 hours)",
|
||||
)
|
||||
cache_prefix: str = Field(
|
||||
default="ctx",
|
||||
description="Redis key prefix for context cache",
|
||||
)
|
||||
|
||||
# Performance settings
|
||||
max_assembly_time_ms: int = Field(
|
||||
default=100,
|
||||
ge=10,
|
||||
le=5000,
|
||||
description="Maximum time for context assembly in milliseconds",
|
||||
)
|
||||
parallel_scoring: bool = Field(
|
||||
default=True,
|
||||
description="Score contexts in parallel for better performance",
|
||||
)
|
||||
max_parallel_scores: int = Field(
|
||||
default=10,
|
||||
ge=1,
|
||||
le=50,
|
||||
description="Maximum number of contexts to score in parallel",
|
||||
)
|
||||
|
||||
# Knowledge retrieval settings
|
||||
knowledge_search_type: str = Field(
|
||||
default="hybrid",
|
||||
description="Default search type for knowledge retrieval",
|
||||
)
|
||||
knowledge_max_results: int = Field(
|
||||
default=10,
|
||||
ge=1,
|
||||
le=50,
|
||||
description="Maximum knowledge chunks to retrieve",
|
||||
)
|
||||
knowledge_min_score: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Minimum relevance score for knowledge",
|
||||
)
|
||||
|
||||
# Conversation history settings
|
||||
conversation_max_turns: int = Field(
|
||||
default=20,
|
||||
ge=1,
|
||||
le=100,
|
||||
description="Maximum conversation turns to include",
|
||||
)
|
||||
conversation_recent_priority: bool = Field(
|
||||
default=True,
|
||||
description="Prioritize recent conversation turns",
|
||||
)
|
||||
|
||||
@field_validator("knowledge_search_type")
|
||||
@classmethod
|
||||
def validate_search_type(cls, v: str) -> str:
|
||||
"""Validate search type is valid."""
|
||||
valid_types = {"semantic", "keyword", "hybrid"}
|
||||
if v not in valid_types:
|
||||
raise ValueError(f"search_type must be one of: {valid_types}")
|
||||
return v
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_budget_allocation(self) -> "ContextSettings":
|
||||
"""Validate that budget percentages sum to 1.0."""
|
||||
total = (
|
||||
self.budget_system
|
||||
+ self.budget_task
|
||||
+ self.budget_knowledge
|
||||
+ self.budget_conversation
|
||||
+ self.budget_tools
|
||||
+ self.budget_response
|
||||
+ self.budget_buffer
|
||||
)
|
||||
# Allow small floating point error
|
||||
if abs(total - 1.0) > 0.001:
|
||||
raise ValueError(
|
||||
f"Budget percentages must sum to 1.0, got {total:.3f}. "
|
||||
f"Current allocation: system={self.budget_system}, task={self.budget_task}, "
|
||||
f"knowledge={self.budget_knowledge}, conversation={self.budget_conversation}, "
|
||||
f"tools={self.budget_tools}, response={self.budget_response}, buffer={self.budget_buffer}"
|
||||
)
|
||||
return self
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_scoring_weights(self) -> "ContextSettings":
|
||||
"""Validate that scoring weights sum to 1.0."""
|
||||
total = (
|
||||
self.scoring_relevance_weight
|
||||
+ self.scoring_recency_weight
|
||||
+ self.scoring_priority_weight
|
||||
)
|
||||
# Allow small floating point error
|
||||
if abs(total - 1.0) > 0.001:
|
||||
raise ValueError(
|
||||
f"Scoring weights must sum to 1.0, got {total:.3f}. "
|
||||
f"Current weights: relevance={self.scoring_relevance_weight}, "
|
||||
f"recency={self.scoring_recency_weight}, priority={self.scoring_priority_weight}"
|
||||
)
|
||||
return self
|
||||
|
||||
def get_budget_allocation(self) -> dict[str, float]:
|
||||
"""Get budget allocation as a dictionary."""
|
||||
return {
|
||||
"system": self.budget_system,
|
||||
"task": self.budget_task,
|
||||
"knowledge": self.budget_knowledge,
|
||||
"conversation": self.budget_conversation,
|
||||
"tools": self.budget_tools,
|
||||
"response": self.budget_response,
|
||||
"buffer": self.budget_buffer,
|
||||
}
|
||||
|
||||
def get_scoring_weights(self) -> dict[str, float]:
|
||||
"""Get scoring weights as a dictionary."""
|
||||
return {
|
||||
"relevance": self.scoring_relevance_weight,
|
||||
"recency": self.scoring_recency_weight,
|
||||
"priority": self.scoring_priority_weight,
|
||||
}
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert settings to dictionary for logging/debugging."""
|
||||
return {
|
||||
"budget": self.get_budget_allocation(),
|
||||
"scoring": self.get_scoring_weights(),
|
||||
"compression": {
|
||||
"threshold": self.compression_threshold,
|
||||
"summary_model_group": self.summary_model_group,
|
||||
},
|
||||
"cache": {
|
||||
"enabled": self.cache_enabled,
|
||||
"ttl_seconds": self.cache_ttl_seconds,
|
||||
"prefix": self.cache_prefix,
|
||||
},
|
||||
"performance": {
|
||||
"max_assembly_time_ms": self.max_assembly_time_ms,
|
||||
"parallel_scoring": self.parallel_scoring,
|
||||
"max_parallel_scores": self.max_parallel_scores,
|
||||
},
|
||||
"knowledge": {
|
||||
"search_type": self.knowledge_search_type,
|
||||
"max_results": self.knowledge_max_results,
|
||||
"min_score": self.knowledge_min_score,
|
||||
},
|
||||
"conversation": {
|
||||
"max_turns": self.conversation_max_turns,
|
||||
"recent_priority": self.conversation_recent_priority,
|
||||
},
|
||||
}
|
||||
|
||||
model_config = {
|
||||
"env_prefix": "CTX_",
|
||||
"env_file": "../.env",
|
||||
"env_file_encoding": "utf-8",
|
||||
"case_sensitive": False,
|
||||
"extra": "ignore",
|
||||
}
|
||||
|
||||
|
||||
# Thread-safe singleton pattern
|
||||
_settings: ContextSettings | None = None
|
||||
_settings_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_context_settings() -> ContextSettings:
|
||||
"""
|
||||
Get the global ContextSettings instance.
|
||||
|
||||
Thread-safe with double-checked locking pattern.
|
||||
|
||||
Returns:
|
||||
ContextSettings instance
|
||||
"""
|
||||
global _settings
|
||||
if _settings is None:
|
||||
with _settings_lock:
|
||||
if _settings is None:
|
||||
_settings = ContextSettings()
|
||||
return _settings
|
||||
|
||||
|
||||
def reset_context_settings() -> None:
|
||||
"""
|
||||
Reset the global settings instance.
|
||||
|
||||
Primarily used for testing.
|
||||
"""
|
||||
global _settings
|
||||
with _settings_lock:
|
||||
_settings = None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_default_settings() -> ContextSettings:
|
||||
"""
|
||||
Get default settings (cached).
|
||||
|
||||
Use this for read-only access to defaults.
|
||||
For mutable access, use get_context_settings().
|
||||
"""
|
||||
return ContextSettings()
|
||||
Reference in New Issue
Block a user