Auto-fixed linting errors and formatting issues: - Removed unused imports (F401): pytest, Any, AnalysisType, MemoryType, OutcomeType - Removed unused variable (F841): hooks variable in test - Applied consistent formatting across memory service and test files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
445 lines
14 KiB
Python
445 lines
14 KiB
Python
"""
|
|
Token Budget Allocator for Context Management.
|
|
|
|
Manages token budget allocation across context types.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from ..config import ContextSettings, get_context_settings
|
|
from ..exceptions import BudgetExceededError
|
|
from ..types import ContextType
|
|
|
|
|
|
@dataclass
|
|
class TokenBudget:
|
|
"""
|
|
Token budget allocation and tracking.
|
|
|
|
Tracks allocated tokens per context type and
|
|
monitors usage to prevent overflows.
|
|
"""
|
|
|
|
# Total budget
|
|
total: int
|
|
|
|
# Allocated per type
|
|
system: int = 0
|
|
task: int = 0
|
|
knowledge: int = 0
|
|
conversation: int = 0
|
|
tools: int = 0
|
|
memory: int = 0 # Agent memory (working, episodic, semantic, procedural)
|
|
response_reserve: int = 0
|
|
buffer: int = 0
|
|
|
|
# Usage tracking
|
|
used: dict[str, int] = field(default_factory=dict)
|
|
|
|
def __post_init__(self) -> None:
|
|
"""Initialize usage tracking."""
|
|
if not self.used:
|
|
self.used = {ct.value: 0 for ct in ContextType}
|
|
|
|
def get_allocation(self, context_type: ContextType | str) -> int:
|
|
"""
|
|
Get allocated tokens for a context type.
|
|
|
|
Args:
|
|
context_type: Context type to get allocation for
|
|
|
|
Returns:
|
|
Allocated token count
|
|
"""
|
|
if isinstance(context_type, ContextType):
|
|
context_type = context_type.value
|
|
|
|
allocation_map = {
|
|
"system": self.system,
|
|
"task": self.task,
|
|
"knowledge": self.knowledge,
|
|
"conversation": self.conversation,
|
|
"tool": self.tools,
|
|
"memory": self.memory,
|
|
}
|
|
return allocation_map.get(context_type, 0)
|
|
|
|
def get_used(self, context_type: ContextType | str) -> int:
|
|
"""
|
|
Get used tokens for a context type.
|
|
|
|
Args:
|
|
context_type: Context type to check
|
|
|
|
Returns:
|
|
Used token count
|
|
"""
|
|
if isinstance(context_type, ContextType):
|
|
context_type = context_type.value
|
|
return self.used.get(context_type, 0)
|
|
|
|
def remaining(self, context_type: ContextType | str) -> int:
|
|
"""
|
|
Get remaining tokens for a context type.
|
|
|
|
Args:
|
|
context_type: Context type to check
|
|
|
|
Returns:
|
|
Remaining token count
|
|
"""
|
|
allocated = self.get_allocation(context_type)
|
|
used = self.get_used(context_type)
|
|
return max(0, allocated - used)
|
|
|
|
def total_remaining(self) -> int:
|
|
"""
|
|
Get total remaining tokens across all types.
|
|
|
|
Returns:
|
|
Total remaining tokens
|
|
"""
|
|
total_used = sum(self.used.values())
|
|
usable = self.total - self.response_reserve - self.buffer
|
|
return max(0, usable - total_used)
|
|
|
|
def total_used(self) -> int:
|
|
"""
|
|
Get total used tokens.
|
|
|
|
Returns:
|
|
Total used tokens
|
|
"""
|
|
return sum(self.used.values())
|
|
|
|
def can_fit(self, context_type: ContextType | str, tokens: int) -> bool:
|
|
"""
|
|
Check if tokens fit within budget for a type.
|
|
|
|
Args:
|
|
context_type: Context type to check
|
|
tokens: Number of tokens to fit
|
|
|
|
Returns:
|
|
True if tokens fit within remaining budget
|
|
"""
|
|
return tokens <= self.remaining(context_type)
|
|
|
|
def allocate(
|
|
self,
|
|
context_type: ContextType | str,
|
|
tokens: int,
|
|
force: bool = False,
|
|
) -> bool:
|
|
"""
|
|
Allocate (use) tokens from a context type's budget.
|
|
|
|
Args:
|
|
context_type: Context type to allocate from
|
|
tokens: Number of tokens to allocate
|
|
force: If True, allow exceeding budget
|
|
|
|
Returns:
|
|
True if allocation succeeded
|
|
|
|
Raises:
|
|
BudgetExceededError: If tokens exceed budget and force=False
|
|
"""
|
|
if isinstance(context_type, ContextType):
|
|
context_type = context_type.value
|
|
|
|
if not force and not self.can_fit(context_type, tokens):
|
|
raise BudgetExceededError(
|
|
message=f"Token budget exceeded for {context_type}",
|
|
allocated=self.get_allocation(context_type),
|
|
requested=self.get_used(context_type) + tokens,
|
|
context_type=context_type,
|
|
)
|
|
|
|
self.used[context_type] = self.used.get(context_type, 0) + tokens
|
|
return True
|
|
|
|
def deallocate(
|
|
self,
|
|
context_type: ContextType | str,
|
|
tokens: int,
|
|
) -> None:
|
|
"""
|
|
Deallocate (return) tokens to a context type's budget.
|
|
|
|
Args:
|
|
context_type: Context type to return to
|
|
tokens: Number of tokens to return
|
|
"""
|
|
if isinstance(context_type, ContextType):
|
|
context_type = context_type.value
|
|
|
|
current = self.used.get(context_type, 0)
|
|
self.used[context_type] = max(0, current - tokens)
|
|
|
|
def reset(self) -> None:
|
|
"""Reset all usage tracking."""
|
|
self.used = {ct.value: 0 for ct in ContextType}
|
|
|
|
def utilization(self, context_type: ContextType | str | None = None) -> float:
|
|
"""
|
|
Get budget utilization percentage.
|
|
|
|
Args:
|
|
context_type: Specific type or None for total
|
|
|
|
Returns:
|
|
Utilization as a fraction (0.0 to 1.0+)
|
|
"""
|
|
if context_type is None:
|
|
usable = self.total - self.response_reserve - self.buffer
|
|
if usable <= 0:
|
|
return 0.0
|
|
return self.total_used() / usable
|
|
|
|
allocated = self.get_allocation(context_type)
|
|
if allocated <= 0:
|
|
return 0.0
|
|
return self.get_used(context_type) / allocated
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
"""Convert budget to dictionary."""
|
|
return {
|
|
"total": self.total,
|
|
"allocations": {
|
|
"system": self.system,
|
|
"task": self.task,
|
|
"knowledge": self.knowledge,
|
|
"conversation": self.conversation,
|
|
"tools": self.tools,
|
|
"memory": self.memory,
|
|
"response_reserve": self.response_reserve,
|
|
"buffer": self.buffer,
|
|
},
|
|
"used": dict(self.used),
|
|
"remaining": {ct.value: self.remaining(ct) for ct in ContextType},
|
|
"total_used": self.total_used(),
|
|
"total_remaining": self.total_remaining(),
|
|
"utilization": round(self.utilization(), 3),
|
|
}
|
|
|
|
|
|
class BudgetAllocator:
|
|
"""
|
|
Budget allocator for context management.
|
|
|
|
Creates token budgets based on configuration and
|
|
model context window sizes.
|
|
"""
|
|
|
|
def __init__(self, settings: ContextSettings | None = None) -> None:
|
|
"""
|
|
Initialize budget allocator.
|
|
|
|
Args:
|
|
settings: Context settings (uses default if None)
|
|
"""
|
|
self._settings = settings or get_context_settings()
|
|
|
|
def create_budget(
|
|
self,
|
|
total_tokens: int,
|
|
custom_allocations: dict[str, float] | None = None,
|
|
) -> TokenBudget:
|
|
"""
|
|
Create a token budget with allocations.
|
|
|
|
Args:
|
|
total_tokens: Total available tokens
|
|
custom_allocations: Optional custom allocation percentages
|
|
|
|
Returns:
|
|
TokenBudget with allocations set
|
|
"""
|
|
# Use custom or default allocations
|
|
if custom_allocations:
|
|
alloc = custom_allocations
|
|
else:
|
|
alloc = self._settings.get_budget_allocation()
|
|
|
|
return TokenBudget(
|
|
total=total_tokens,
|
|
system=int(total_tokens * alloc.get("system", 0.05)),
|
|
task=int(total_tokens * alloc.get("task", 0.10)),
|
|
knowledge=int(total_tokens * alloc.get("knowledge", 0.30)),
|
|
conversation=int(total_tokens * alloc.get("conversation", 0.15)),
|
|
tools=int(total_tokens * alloc.get("tools", 0.05)),
|
|
memory=int(total_tokens * alloc.get("memory", 0.15)),
|
|
response_reserve=int(total_tokens * alloc.get("response", 0.15)),
|
|
buffer=int(total_tokens * alloc.get("buffer", 0.05)),
|
|
)
|
|
|
|
def adjust_budget(
|
|
self,
|
|
budget: TokenBudget,
|
|
context_type: ContextType | str,
|
|
adjustment: int,
|
|
) -> TokenBudget:
|
|
"""
|
|
Adjust a specific allocation in a budget.
|
|
|
|
Takes tokens from buffer and adds to specified type.
|
|
|
|
Args:
|
|
budget: Budget to adjust
|
|
context_type: Type to adjust
|
|
adjustment: Positive to increase, negative to decrease
|
|
|
|
Returns:
|
|
Adjusted budget
|
|
"""
|
|
if isinstance(context_type, ContextType):
|
|
context_type = context_type.value
|
|
|
|
# Calculate adjustment (limited by buffer for increases, by current allocation for decreases)
|
|
if adjustment > 0:
|
|
# Taking from buffer - limited by available buffer
|
|
actual_adjustment = min(adjustment, budget.buffer)
|
|
budget.buffer -= actual_adjustment
|
|
else:
|
|
# Returning to buffer - limited by current allocation of target type
|
|
current_allocation = budget.get_allocation(context_type)
|
|
# Can't return more than current allocation
|
|
actual_adjustment = max(adjustment, -current_allocation)
|
|
# Add returned tokens back to buffer (adjustment is negative, so subtract)
|
|
budget.buffer -= actual_adjustment
|
|
|
|
# Apply to target type
|
|
if context_type == "system":
|
|
budget.system = max(0, budget.system + actual_adjustment)
|
|
elif context_type == "task":
|
|
budget.task = max(0, budget.task + actual_adjustment)
|
|
elif context_type == "knowledge":
|
|
budget.knowledge = max(0, budget.knowledge + actual_adjustment)
|
|
elif context_type == "conversation":
|
|
budget.conversation = max(0, budget.conversation + actual_adjustment)
|
|
elif context_type == "tool":
|
|
budget.tools = max(0, budget.tools + actual_adjustment)
|
|
elif context_type == "memory":
|
|
budget.memory = max(0, budget.memory + actual_adjustment)
|
|
|
|
return budget
|
|
|
|
def rebalance_budget(
|
|
self,
|
|
budget: TokenBudget,
|
|
prioritize: list[ContextType] | None = None,
|
|
) -> TokenBudget:
|
|
"""
|
|
Rebalance budget based on actual usage.
|
|
|
|
Moves unused allocations to prioritized types.
|
|
|
|
Args:
|
|
budget: Budget to rebalance
|
|
prioritize: Types to prioritize (in order)
|
|
|
|
Returns:
|
|
Rebalanced budget
|
|
"""
|
|
if prioritize is None:
|
|
prioritize = [
|
|
ContextType.KNOWLEDGE,
|
|
ContextType.MEMORY,
|
|
ContextType.TASK,
|
|
ContextType.SYSTEM,
|
|
]
|
|
|
|
# Calculate unused tokens per type
|
|
unused: dict[str, int] = {}
|
|
for ct in ContextType:
|
|
remaining = budget.remaining(ct)
|
|
if remaining > 0:
|
|
unused[ct.value] = remaining
|
|
|
|
# Calculate total reclaimable (excluding prioritized types)
|
|
prioritize_values = {ct.value for ct in prioritize}
|
|
reclaimable = sum(
|
|
tokens for ct, tokens in unused.items() if ct not in prioritize_values
|
|
)
|
|
|
|
# Redistribute to prioritized types that are near capacity
|
|
for ct in prioritize:
|
|
utilization = budget.utilization(ct)
|
|
|
|
if utilization > 0.8: # Near capacity
|
|
# Give more tokens from reclaimable pool
|
|
bonus = min(reclaimable, budget.get_allocation(ct) // 2)
|
|
self.adjust_budget(budget, ct, bonus)
|
|
reclaimable -= bonus
|
|
|
|
if reclaimable <= 0:
|
|
break
|
|
|
|
return budget
|
|
|
|
def get_model_context_size(self, model: str) -> int:
|
|
"""
|
|
Get context window size for a model.
|
|
|
|
Args:
|
|
model: Model name
|
|
|
|
Returns:
|
|
Context window size in tokens
|
|
"""
|
|
# Common model context sizes
|
|
context_sizes = {
|
|
"claude-3-opus": 200000,
|
|
"claude-3-sonnet": 200000,
|
|
"claude-3-haiku": 200000,
|
|
"claude-3-5-sonnet": 200000,
|
|
"claude-3-5-haiku": 200000,
|
|
"claude-opus-4": 200000,
|
|
"gpt-4-turbo": 128000,
|
|
"gpt-4": 8192,
|
|
"gpt-4-32k": 32768,
|
|
"gpt-4o": 128000,
|
|
"gpt-4o-mini": 128000,
|
|
"gpt-3.5-turbo": 16385,
|
|
"gemini-1.5-pro": 2000000,
|
|
"gemini-1.5-flash": 1000000,
|
|
"gemini-2.0-flash": 1000000,
|
|
"qwen-plus": 32000,
|
|
"qwen-turbo": 8000,
|
|
"deepseek-chat": 64000,
|
|
"deepseek-reasoner": 64000,
|
|
}
|
|
|
|
# Check exact match first
|
|
model_lower = model.lower()
|
|
if model_lower in context_sizes:
|
|
return context_sizes[model_lower]
|
|
|
|
# Check prefix match
|
|
for model_name, size in context_sizes.items():
|
|
if model_lower.startswith(model_name):
|
|
return size
|
|
|
|
# Default fallback
|
|
return 8192
|
|
|
|
def create_budget_for_model(
|
|
self,
|
|
model: str,
|
|
custom_allocations: dict[str, float] | None = None,
|
|
) -> TokenBudget:
|
|
"""
|
|
Create a budget based on model's context window.
|
|
|
|
Args:
|
|
model: Model name
|
|
custom_allocations: Optional custom allocation percentages
|
|
|
|
Returns:
|
|
TokenBudget sized for the model
|
|
"""
|
|
context_size = self.get_model_context_size(model)
|
|
return self.create_budget(context_size, custom_allocations)
|