""" Token Budget Allocator for Context Management. Manages token budget allocation across context types. """ from dataclasses import dataclass, field from typing import Any from ..config import ContextSettings, get_context_settings from ..exceptions import BudgetExceededError from ..types import ContextType @dataclass class TokenBudget: """ Token budget allocation and tracking. Tracks allocated tokens per context type and monitors usage to prevent overflows. """ # Total budget total: int # Allocated per type system: int = 0 task: int = 0 knowledge: int = 0 conversation: int = 0 tools: int = 0 response_reserve: int = 0 buffer: int = 0 # Usage tracking used: dict[str, int] = field(default_factory=dict) def __post_init__(self) -> None: """Initialize usage tracking.""" if not self.used: self.used = {ct.value: 0 for ct in ContextType} def get_allocation(self, context_type: ContextType | str) -> int: """ Get allocated tokens for a context type. Args: context_type: Context type to get allocation for Returns: Allocated token count """ if isinstance(context_type, ContextType): context_type = context_type.value allocation_map = { "system": self.system, "task": self.task, "knowledge": self.knowledge, "conversation": self.conversation, "tool": self.tools, } return allocation_map.get(context_type, 0) def get_used(self, context_type: ContextType | str) -> int: """ Get used tokens for a context type. Args: context_type: Context type to check Returns: Used token count """ if isinstance(context_type, ContextType): context_type = context_type.value return self.used.get(context_type, 0) def remaining(self, context_type: ContextType | str) -> int: """ Get remaining tokens for a context type. Args: context_type: Context type to check Returns: Remaining token count """ allocated = self.get_allocation(context_type) used = self.get_used(context_type) return max(0, allocated - used) def total_remaining(self) -> int: """ Get total remaining tokens across all types. Returns: Total remaining tokens """ total_used = sum(self.used.values()) usable = self.total - self.response_reserve - self.buffer return max(0, usable - total_used) def total_used(self) -> int: """ Get total used tokens. Returns: Total used tokens """ return sum(self.used.values()) def can_fit(self, context_type: ContextType | str, tokens: int) -> bool: """ Check if tokens fit within budget for a type. Args: context_type: Context type to check tokens: Number of tokens to fit Returns: True if tokens fit within remaining budget """ return tokens <= self.remaining(context_type) def allocate( self, context_type: ContextType | str, tokens: int, force: bool = False, ) -> bool: """ Allocate (use) tokens from a context type's budget. Args: context_type: Context type to allocate from tokens: Number of tokens to allocate force: If True, allow exceeding budget Returns: True if allocation succeeded Raises: BudgetExceededError: If tokens exceed budget and force=False """ if isinstance(context_type, ContextType): context_type = context_type.value if not force and not self.can_fit(context_type, tokens): raise BudgetExceededError( message=f"Token budget exceeded for {context_type}", allocated=self.get_allocation(context_type), requested=self.get_used(context_type) + tokens, context_type=context_type, ) self.used[context_type] = self.used.get(context_type, 0) + tokens return True def deallocate( self, context_type: ContextType | str, tokens: int, ) -> None: """ Deallocate (return) tokens to a context type's budget. Args: context_type: Context type to return to tokens: Number of tokens to return """ if isinstance(context_type, ContextType): context_type = context_type.value current = self.used.get(context_type, 0) self.used[context_type] = max(0, current - tokens) def reset(self) -> None: """Reset all usage tracking.""" self.used = {ct.value: 0 for ct in ContextType} def utilization(self, context_type: ContextType | str | None = None) -> float: """ Get budget utilization percentage. Args: context_type: Specific type or None for total Returns: Utilization as a fraction (0.0 to 1.0+) """ if context_type is None: usable = self.total - self.response_reserve - self.buffer if usable <= 0: return 0.0 return self.total_used() / usable allocated = self.get_allocation(context_type) if allocated <= 0: return 0.0 return self.get_used(context_type) / allocated def to_dict(self) -> dict[str, Any]: """Convert budget to dictionary.""" return { "total": self.total, "allocations": { "system": self.system, "task": self.task, "knowledge": self.knowledge, "conversation": self.conversation, "tools": self.tools, "response_reserve": self.response_reserve, "buffer": self.buffer, }, "used": dict(self.used), "remaining": {ct.value: self.remaining(ct) for ct in ContextType}, "total_used": self.total_used(), "total_remaining": self.total_remaining(), "utilization": round(self.utilization(), 3), } class BudgetAllocator: """ Budget allocator for context management. Creates token budgets based on configuration and model context window sizes. """ def __init__(self, settings: ContextSettings | None = None) -> None: """ Initialize budget allocator. Args: settings: Context settings (uses default if None) """ self._settings = settings or get_context_settings() def create_budget( self, total_tokens: int, custom_allocations: dict[str, float] | None = None, ) -> TokenBudget: """ Create a token budget with allocations. Args: total_tokens: Total available tokens custom_allocations: Optional custom allocation percentages Returns: TokenBudget with allocations set """ # Use custom or default allocations if custom_allocations: alloc = custom_allocations else: alloc = self._settings.get_budget_allocation() return TokenBudget( total=total_tokens, system=int(total_tokens * alloc.get("system", 0.05)), task=int(total_tokens * alloc.get("task", 0.10)), knowledge=int(total_tokens * alloc.get("knowledge", 0.40)), conversation=int(total_tokens * alloc.get("conversation", 0.20)), tools=int(total_tokens * alloc.get("tools", 0.05)), response_reserve=int(total_tokens * alloc.get("response", 0.15)), buffer=int(total_tokens * alloc.get("buffer", 0.05)), ) def adjust_budget( self, budget: TokenBudget, context_type: ContextType | str, adjustment: int, ) -> TokenBudget: """ Adjust a specific allocation in a budget. Takes tokens from buffer and adds to specified type. Args: budget: Budget to adjust context_type: Type to adjust adjustment: Positive to increase, negative to decrease Returns: Adjusted budget """ if isinstance(context_type, ContextType): context_type = context_type.value # Calculate adjustment (limited by buffer for increases, by current allocation for decreases) if adjustment > 0: # Taking from buffer - limited by available buffer actual_adjustment = min(adjustment, budget.buffer) budget.buffer -= actual_adjustment else: # Returning to buffer - limited by current allocation of target type current_allocation = budget.get_allocation(context_type) # Can't return more than current allocation actual_adjustment = max(adjustment, -current_allocation) # Add returned tokens back to buffer (adjustment is negative, so subtract) budget.buffer -= actual_adjustment # Apply to target type if context_type == "system": budget.system = max(0, budget.system + actual_adjustment) elif context_type == "task": budget.task = max(0, budget.task + actual_adjustment) elif context_type == "knowledge": budget.knowledge = max(0, budget.knowledge + actual_adjustment) elif context_type == "conversation": budget.conversation = max(0, budget.conversation + actual_adjustment) elif context_type == "tool": budget.tools = max(0, budget.tools + actual_adjustment) return budget def rebalance_budget( self, budget: TokenBudget, prioritize: list[ContextType] | None = None, ) -> TokenBudget: """ Rebalance budget based on actual usage. Moves unused allocations to prioritized types. Args: budget: Budget to rebalance prioritize: Types to prioritize (in order) Returns: Rebalanced budget """ if prioritize is None: prioritize = [ContextType.KNOWLEDGE, ContextType.TASK, ContextType.SYSTEM] # Calculate unused tokens per type unused: dict[str, int] = {} for ct in ContextType: remaining = budget.remaining(ct) if remaining > 0: unused[ct.value] = remaining # Calculate total reclaimable (excluding prioritized types) prioritize_values = {ct.value for ct in prioritize} reclaimable = sum( tokens for ct, tokens in unused.items() if ct not in prioritize_values ) # Redistribute to prioritized types that are near capacity for ct in prioritize: utilization = budget.utilization(ct) if utilization > 0.8: # Near capacity # Give more tokens from reclaimable pool bonus = min(reclaimable, budget.get_allocation(ct) // 2) self.adjust_budget(budget, ct, bonus) reclaimable -= bonus if reclaimable <= 0: break return budget def get_model_context_size(self, model: str) -> int: """ Get context window size for a model. Args: model: Model name Returns: Context window size in tokens """ # Common model context sizes context_sizes = { "claude-3-opus": 200000, "claude-3-sonnet": 200000, "claude-3-haiku": 200000, "claude-3-5-sonnet": 200000, "claude-3-5-haiku": 200000, "claude-opus-4": 200000, "gpt-4-turbo": 128000, "gpt-4": 8192, "gpt-4-32k": 32768, "gpt-4o": 128000, "gpt-4o-mini": 128000, "gpt-3.5-turbo": 16385, "gemini-1.5-pro": 2000000, "gemini-1.5-flash": 1000000, "gemini-2.0-flash": 1000000, "qwen-plus": 32000, "qwen-turbo": 8000, "deepseek-chat": 64000, "deepseek-reasoner": 64000, } # Check exact match first model_lower = model.lower() if model_lower in context_sizes: return context_sizes[model_lower] # Check prefix match for model_name, size in context_sizes.items(): if model_lower.startswith(model_name): return size # Default fallback return 8192 def create_budget_for_model( self, model: str, custom_allocations: dict[str, float] | None = None, ) -> TokenBudget: """ Create a budget based on model's context window. Args: model: Model name custom_allocations: Optional custom allocation percentages Returns: TokenBudget sized for the model """ context_size = self.get_model_context_size(model) return self.create_budget(context_size, custom_allocations)