feat(context): implement token budget management (Phase 2)

Add TokenCalculator with LLM Gateway integration for accurate token
counting with in-memory caching and fallback character-based estimation.
Implement TokenBudget for tracking allocations per context type with
budget enforcement, and BudgetAllocator for creating budgets based on
model context window sizes.

- TokenCalculator: MCP integration, caching, model-specific ratios
- TokenBudget: allocation tracking, can_fit/allocate/deallocate/reset
- BudgetAllocator: model context sizes, budget creation and adjustment
- 35 comprehensive tests covering all budget functionality

Part of #61 - Context Management Engine

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-04 02:13:23 +01:00
parent 22ecb5e989
commit dfa75e682e
5 changed files with 1277 additions and 0 deletions

View File

@@ -0,0 +1,433 @@
"""
Token Budget Allocator for Context Management.
Manages token budget allocation across context types.
"""
from dataclasses import dataclass, field
from typing import Any
from ..config import ContextSettings, get_context_settings
from ..exceptions import BudgetExceededError
from ..types import ContextType
@dataclass
class TokenBudget:
"""
Token budget allocation and tracking.
Tracks allocated tokens per context type and
monitors usage to prevent overflows.
"""
# Total budget
total: int
# Allocated per type
system: int = 0
task: int = 0
knowledge: int = 0
conversation: int = 0
tools: int = 0
response_reserve: int = 0
buffer: int = 0
# Usage tracking
used: dict[str, int] = field(default_factory=dict)
def __post_init__(self) -> None:
"""Initialize usage tracking."""
if not self.used:
self.used = {ct.value: 0 for ct in ContextType}
def get_allocation(self, context_type: ContextType | str) -> int:
"""
Get allocated tokens for a context type.
Args:
context_type: Context type to get allocation for
Returns:
Allocated token count
"""
if isinstance(context_type, ContextType):
context_type = context_type.value
allocation_map = {
"system": self.system,
"task": self.task,
"knowledge": self.knowledge,
"conversation": self.conversation,
"tool": self.tools,
}
return allocation_map.get(context_type, 0)
def get_used(self, context_type: ContextType | str) -> int:
"""
Get used tokens for a context type.
Args:
context_type: Context type to check
Returns:
Used token count
"""
if isinstance(context_type, ContextType):
context_type = context_type.value
return self.used.get(context_type, 0)
def remaining(self, context_type: ContextType | str) -> int:
"""
Get remaining tokens for a context type.
Args:
context_type: Context type to check
Returns:
Remaining token count
"""
allocated = self.get_allocation(context_type)
used = self.get_used(context_type)
return max(0, allocated - used)
def total_remaining(self) -> int:
"""
Get total remaining tokens across all types.
Returns:
Total remaining tokens
"""
total_used = sum(self.used.values())
usable = self.total - self.response_reserve - self.buffer
return max(0, usable - total_used)
def total_used(self) -> int:
"""
Get total used tokens.
Returns:
Total used tokens
"""
return sum(self.used.values())
def can_fit(self, context_type: ContextType | str, tokens: int) -> bool:
"""
Check if tokens fit within budget for a type.
Args:
context_type: Context type to check
tokens: Number of tokens to fit
Returns:
True if tokens fit within remaining budget
"""
return tokens <= self.remaining(context_type)
def allocate(
self,
context_type: ContextType | str,
tokens: int,
force: bool = False,
) -> bool:
"""
Allocate (use) tokens from a context type's budget.
Args:
context_type: Context type to allocate from
tokens: Number of tokens to allocate
force: If True, allow exceeding budget
Returns:
True if allocation succeeded
Raises:
BudgetExceededError: If tokens exceed budget and force=False
"""
if isinstance(context_type, ContextType):
context_type = context_type.value
if not force and not self.can_fit(context_type, tokens):
raise BudgetExceededError(
message=f"Token budget exceeded for {context_type}",
allocated=self.get_allocation(context_type),
requested=self.get_used(context_type) + tokens,
context_type=context_type,
)
self.used[context_type] = self.used.get(context_type, 0) + tokens
return True
def deallocate(
self,
context_type: ContextType | str,
tokens: int,
) -> None:
"""
Deallocate (return) tokens to a context type's budget.
Args:
context_type: Context type to return to
tokens: Number of tokens to return
"""
if isinstance(context_type, ContextType):
context_type = context_type.value
current = self.used.get(context_type, 0)
self.used[context_type] = max(0, current - tokens)
def reset(self) -> None:
"""Reset all usage tracking."""
self.used = {ct.value: 0 for ct in ContextType}
def utilization(self, context_type: ContextType | str | None = None) -> float:
"""
Get budget utilization percentage.
Args:
context_type: Specific type or None for total
Returns:
Utilization as a fraction (0.0 to 1.0+)
"""
if context_type is None:
usable = self.total - self.response_reserve - self.buffer
if usable <= 0:
return 0.0
return self.total_used() / usable
allocated = self.get_allocation(context_type)
if allocated <= 0:
return 0.0
return self.get_used(context_type) / allocated
def to_dict(self) -> dict[str, Any]:
"""Convert budget to dictionary."""
return {
"total": self.total,
"allocations": {
"system": self.system,
"task": self.task,
"knowledge": self.knowledge,
"conversation": self.conversation,
"tools": self.tools,
"response_reserve": self.response_reserve,
"buffer": self.buffer,
},
"used": dict(self.used),
"remaining": {
ct.value: self.remaining(ct) for ct in ContextType
},
"total_used": self.total_used(),
"total_remaining": self.total_remaining(),
"utilization": round(self.utilization(), 3),
}
class BudgetAllocator:
"""
Budget allocator for context management.
Creates token budgets based on configuration and
model context window sizes.
"""
def __init__(self, settings: ContextSettings | None = None) -> None:
"""
Initialize budget allocator.
Args:
settings: Context settings (uses default if None)
"""
self._settings = settings or get_context_settings()
def create_budget(
self,
total_tokens: int,
custom_allocations: dict[str, float] | None = None,
) -> TokenBudget:
"""
Create a token budget with allocations.
Args:
total_tokens: Total available tokens
custom_allocations: Optional custom allocation percentages
Returns:
TokenBudget with allocations set
"""
# Use custom or default allocations
if custom_allocations:
alloc = custom_allocations
else:
alloc = self._settings.get_budget_allocation()
return TokenBudget(
total=total_tokens,
system=int(total_tokens * alloc.get("system", 0.05)),
task=int(total_tokens * alloc.get("task", 0.10)),
knowledge=int(total_tokens * alloc.get("knowledge", 0.40)),
conversation=int(total_tokens * alloc.get("conversation", 0.20)),
tools=int(total_tokens * alloc.get("tools", 0.05)),
response_reserve=int(total_tokens * alloc.get("response", 0.15)),
buffer=int(total_tokens * alloc.get("buffer", 0.05)),
)
def adjust_budget(
self,
budget: TokenBudget,
context_type: ContextType | str,
adjustment: int,
) -> TokenBudget:
"""
Adjust a specific allocation in a budget.
Takes tokens from buffer and adds to specified type.
Args:
budget: Budget to adjust
context_type: Type to adjust
adjustment: Positive to increase, negative to decrease
Returns:
Adjusted budget
"""
if isinstance(context_type, ContextType):
context_type = context_type.value
# Calculate adjustment (limited by buffer)
if adjustment > 0:
# Taking from buffer
actual_adjustment = min(adjustment, budget.buffer)
budget.buffer -= actual_adjustment
else:
# Returning to buffer
actual_adjustment = adjustment
# Apply to target type
if context_type == "system":
budget.system = max(0, budget.system + actual_adjustment)
elif context_type == "task":
budget.task = max(0, budget.task + actual_adjustment)
elif context_type == "knowledge":
budget.knowledge = max(0, budget.knowledge + actual_adjustment)
elif context_type == "conversation":
budget.conversation = max(0, budget.conversation + actual_adjustment)
elif context_type == "tool":
budget.tools = max(0, budget.tools + actual_adjustment)
return budget
def rebalance_budget(
self,
budget: TokenBudget,
prioritize: list[ContextType] | None = None,
) -> TokenBudget:
"""
Rebalance budget based on actual usage.
Moves unused allocations to prioritized types.
Args:
budget: Budget to rebalance
prioritize: Types to prioritize (in order)
Returns:
Rebalanced budget
"""
if prioritize is None:
prioritize = [ContextType.KNOWLEDGE, ContextType.TASK, ContextType.SYSTEM]
# Calculate unused tokens per type
unused: dict[str, int] = {}
for ct in ContextType:
remaining = budget.remaining(ct)
if remaining > 0:
unused[ct.value] = remaining
# Calculate total reclaimable (excluding prioritized types)
prioritize_values = {ct.value for ct in prioritize}
reclaimable = sum(
tokens for ct, tokens in unused.items()
if ct not in prioritize_values
)
# Redistribute to prioritized types that are near capacity
for ct in prioritize:
ct_value = ct.value
utilization = budget.utilization(ct)
if utilization > 0.8: # Near capacity
# Give more tokens from reclaimable pool
bonus = min(reclaimable, budget.get_allocation(ct) // 2)
self.adjust_budget(budget, ct, bonus)
reclaimable -= bonus
if reclaimable <= 0:
break
return budget
def get_model_context_size(self, model: str) -> int:
"""
Get context window size for a model.
Args:
model: Model name
Returns:
Context window size in tokens
"""
# Common model context sizes
context_sizes = {
"claude-3-opus": 200000,
"claude-3-sonnet": 200000,
"claude-3-haiku": 200000,
"claude-3-5-sonnet": 200000,
"claude-3-5-haiku": 200000,
"claude-opus-4": 200000,
"gpt-4-turbo": 128000,
"gpt-4": 8192,
"gpt-4-32k": 32768,
"gpt-4o": 128000,
"gpt-4o-mini": 128000,
"gpt-3.5-turbo": 16385,
"gemini-1.5-pro": 2000000,
"gemini-1.5-flash": 1000000,
"gemini-2.0-flash": 1000000,
"qwen-plus": 32000,
"qwen-turbo": 8000,
"deepseek-chat": 64000,
"deepseek-reasoner": 64000,
}
# Check exact match first
model_lower = model.lower()
if model_lower in context_sizes:
return context_sizes[model_lower]
# Check prefix match
for model_name, size in context_sizes.items():
if model_lower.startswith(model_name):
return size
# Default fallback
return 8192
def create_budget_for_model(
self,
model: str,
custom_allocations: dict[str, float] | None = None,
) -> TokenBudget:
"""
Create a budget based on model's context window.
Args:
model: Model name
custom_allocations: Optional custom allocation percentages
Returns:
TokenBudget sized for the model
"""
context_size = self.get_model_context_size(model)
return self.create_budget(context_size, custom_allocations)