fast-next-template/backend/app/services/context/budget/allocator.py

"""
Token Budget Allocator for Context Management.

Manages token budget allocation across context types.
"""

from dataclasses import dataclass, field
from typing import Any

from ..config import ContextSettings, get_context_settings
from ..exceptions import BudgetExceededError
from ..types import ContextType


@dataclass
class TokenBudget:
    """
    Token budget allocation and tracking.

    Tracks allocated tokens per context type and
    monitors usage to prevent overflows.
    """

    # Total budget
    total: int

    # Allocated per type
    system: int = 0
    task: int = 0
    knowledge: int = 0
    conversation: int = 0
    tools: int = 0
    memory: int = 0  # Agent memory (working, episodic, semantic, procedural)
    response_reserve: int = 0
    buffer: int = 0

    # Usage tracking
    used: dict[str, int] = field(default_factory=dict)

    def __post_init__(self) -> None:
        """Initialize usage tracking."""
        if not self.used:
            self.used = {ct.value: 0 for ct in ContextType}

    def get_allocation(self, context_type: ContextType | str) -> int:
        """
        Get allocated tokens for a context type.

        Args:
            context_type: Context type to get allocation for

        Returns:
            Allocated token count
        """
        if isinstance(context_type, ContextType):
            context_type = context_type.value

        allocation_map = {
            "system": self.system,
            "task": self.task,
            "knowledge": self.knowledge,
            "conversation": self.conversation,
            "tool": self.tools,
            "memory": self.memory,
        }
        return allocation_map.get(context_type, 0)

    def get_used(self, context_type: ContextType | str) -> int:
        """
        Get used tokens for a context type.

        Args:
            context_type: Context type to check

        Returns:
            Used token count
        """
        if isinstance(context_type, ContextType):
            context_type = context_type.value
        return self.used.get(context_type, 0)

    def remaining(self, context_type: ContextType | str) -> int:
        """
        Get remaining tokens for a context type.

        Args:
            context_type: Context type to check

        Returns:
            Remaining token count
        """
        allocated = self.get_allocation(context_type)
        used = self.get_used(context_type)
        return max(0, allocated - used)

    def total_remaining(self) -> int:
        """
        Get total remaining tokens across all types.

        Returns:
            Total remaining tokens
        """
        total_used = sum(self.used.values())
        usable = self.total - self.response_reserve - self.buffer
        return max(0, usable - total_used)

    def total_used(self) -> int:
        """
        Get total used tokens.

        Returns:
            Total used tokens
        """
        return sum(self.used.values())

    def can_fit(self, context_type: ContextType | str, tokens: int) -> bool:
        """
        Check if tokens fit within budget for a type.

        Args:
            context_type: Context type to check
            tokens: Number of tokens to fit

        Returns:
            True if tokens fit within remaining budget
        """
        return tokens <= self.remaining(context_type)

    def allocate(
        self,
        context_type: ContextType | str,
        tokens: int,
        force: bool = False,
    ) -> bool:
        """
        Allocate (use) tokens from a context type's budget.

        Args:
            context_type: Context type to allocate from
            tokens: Number of tokens to allocate
            force: If True, allow exceeding budget

        Returns:
            True if allocation succeeded

        Raises:
            BudgetExceededError: If tokens exceed budget and force=False
        """
        if isinstance(context_type, ContextType):
            context_type = context_type.value

        if not force and not self.can_fit(context_type, tokens):
            raise BudgetExceededError(
                message=f"Token budget exceeded for {context_type}",
                allocated=self.get_allocation(context_type),
                requested=self.get_used(context_type) + tokens,
                context_type=context_type,
            )

        self.used[context_type] = self.used.get(context_type, 0) + tokens
        return True

    def deallocate(
        self,
        context_type: ContextType | str,
        tokens: int,
    ) -> None:
        """
        Deallocate (return) tokens to a context type's budget.

        Args:
            context_type: Context type to return to
            tokens: Number of tokens to return
        """
        if isinstance(context_type, ContextType):
            context_type = context_type.value

        current = self.used.get(context_type, 0)
        self.used[context_type] = max(0, current - tokens)

    def reset(self) -> None:
        """Reset all usage tracking."""
        self.used = {ct.value: 0 for ct in ContextType}

    def utilization(self, context_type: ContextType | str | None = None) -> float:
        """
        Get budget utilization percentage.

        Args:
            context_type: Specific type or None for total

        Returns:
            Utilization as a fraction (0.0 to 1.0+)
        """
        if context_type is None:
            usable = self.total - self.response_reserve - self.buffer
            if usable <= 0:
                return 0.0
            return self.total_used() / usable

        allocated = self.get_allocation(context_type)
        if allocated <= 0:
            return 0.0
        return self.get_used(context_type) / allocated

    def to_dict(self) -> dict[str, Any]:
        """Convert budget to dictionary."""
        return {
            "total": self.total,
            "allocations": {
                "system": self.system,
                "task": self.task,
                "knowledge": self.knowledge,
                "conversation": self.conversation,
                "tools": self.tools,
                "memory": self.memory,
                "response_reserve": self.response_reserve,
                "buffer": self.buffer,
            },
            "used": dict(self.used),
            "remaining": {ct.value: self.remaining(ct) for ct in ContextType},
            "total_used": self.total_used(),
            "total_remaining": self.total_remaining(),
            "utilization": round(self.utilization(), 3),
        }


class BudgetAllocator:
    """
    Budget allocator for context management.

    Creates token budgets based on configuration and
    model context window sizes.
    """

    def __init__(self, settings: ContextSettings | None = None) -> None:
        """
        Initialize budget allocator.

        Args:
            settings: Context settings (uses default if None)
        """
        self._settings = settings or get_context_settings()

    def create_budget(
        self,
        total_tokens: int,
        custom_allocations: dict[str, float] | None = None,
    ) -> TokenBudget:
        """
        Create a token budget with allocations.

        Args:
            total_tokens: Total available tokens
            custom_allocations: Optional custom allocation percentages

        Returns:
            TokenBudget with allocations set
        """
        # Use custom or default allocations
        if custom_allocations:
            alloc = custom_allocations
        else:
            alloc = self._settings.get_budget_allocation()

        return TokenBudget(
            total=total_tokens,
            system=int(total_tokens * alloc.get("system", 0.05)),
            task=int(total_tokens * alloc.get("task", 0.10)),
            knowledge=int(total_tokens * alloc.get("knowledge", 0.30)),
            conversation=int(total_tokens * alloc.get("conversation", 0.15)),
            tools=int(total_tokens * alloc.get("tools", 0.05)),
            memory=int(total_tokens * alloc.get("memory", 0.15)),
            response_reserve=int(total_tokens * alloc.get("response", 0.15)),
            buffer=int(total_tokens * alloc.get("buffer", 0.05)),
        )

    def adjust_budget(
        self,
        budget: TokenBudget,
        context_type: ContextType | str,
        adjustment: int,
    ) -> TokenBudget:
        """
        Adjust a specific allocation in a budget.

        Takes tokens from buffer and adds to specified type.

        Args:
            budget: Budget to adjust
            context_type: Type to adjust
            adjustment: Positive to increase, negative to decrease

        Returns:
            Adjusted budget
        """
        if isinstance(context_type, ContextType):
            context_type = context_type.value

        # Calculate adjustment (limited by buffer for increases, by current allocation for decreases)
        if adjustment > 0:
            # Taking from buffer - limited by available buffer
            actual_adjustment = min(adjustment, budget.buffer)
            budget.buffer -= actual_adjustment
        else:
            # Returning to buffer - limited by current allocation of target type
            current_allocation = budget.get_allocation(context_type)
            # Can't return more than current allocation
            actual_adjustment = max(adjustment, -current_allocation)
            # Add returned tokens back to buffer (adjustment is negative, so subtract)
            budget.buffer -= actual_adjustment

        # Apply to target type
        if context_type == "system":
            budget.system = max(0, budget.system + actual_adjustment)
        elif context_type == "task":
            budget.task = max(0, budget.task + actual_adjustment)
        elif context_type == "knowledge":
            budget.knowledge = max(0, budget.knowledge + actual_adjustment)
        elif context_type == "conversation":
            budget.conversation = max(0, budget.conversation + actual_adjustment)
        elif context_type == "tool":
            budget.tools = max(0, budget.tools + actual_adjustment)
        elif context_type == "memory":
            budget.memory = max(0, budget.memory + actual_adjustment)

        return budget

    def rebalance_budget(
        self,
        budget: TokenBudget,
        prioritize: list[ContextType] | None = None,
    ) -> TokenBudget:
        """
        Rebalance budget based on actual usage.

        Moves unused allocations to prioritized types.

        Args:
            budget: Budget to rebalance
            prioritize: Types to prioritize (in order)

        Returns:
            Rebalanced budget
        """
        if prioritize is None:
            prioritize = [
                ContextType.KNOWLEDGE,
                ContextType.MEMORY,
                ContextType.TASK,
                ContextType.SYSTEM,
            ]

        # Calculate unused tokens per type
        unused: dict[str, int] = {}
        for ct in ContextType:
            remaining = budget.remaining(ct)
            if remaining > 0:
                unused[ct.value] = remaining

        # Calculate total reclaimable (excluding prioritized types)
        prioritize_values = {ct.value for ct in prioritize}
        reclaimable = sum(
            tokens for ct, tokens in unused.items() if ct not in prioritize_values
        )

        # Redistribute to prioritized types that are near capacity
        for ct in prioritize:
            utilization = budget.utilization(ct)

            if utilization > 0.8:  # Near capacity
                # Give more tokens from reclaimable pool
                bonus = min(reclaimable, budget.get_allocation(ct) // 2)
                self.adjust_budget(budget, ct, bonus)
                reclaimable -= bonus

            if reclaimable <= 0:
                break

        return budget

    def get_model_context_size(self, model: str) -> int:
        """
        Get context window size for a model.

        Args:
            model: Model name

        Returns:
            Context window size in tokens
        """
        # Common model context sizes
        context_sizes = {
            "claude-3-opus": 200000,
            "claude-3-sonnet": 200000,
            "claude-3-haiku": 200000,
            "claude-3-5-sonnet": 200000,
            "claude-3-5-haiku": 200000,
            "claude-opus-4": 200000,
            "gpt-4-turbo": 128000,
            "gpt-4": 8192,
            "gpt-4-32k": 32768,
            "gpt-4o": 128000,
            "gpt-4o-mini": 128000,
            "gpt-3.5-turbo": 16385,
            "gemini-1.5-pro": 2000000,
            "gemini-1.5-flash": 1000000,
            "gemini-2.0-flash": 1000000,
            "qwen-plus": 32000,
            "qwen-turbo": 8000,
            "deepseek-chat": 64000,
            "deepseek-reasoner": 64000,
        }

        # Check exact match first
        model_lower = model.lower()
        if model_lower in context_sizes:
            return context_sizes[model_lower]

        # Check prefix match
        for model_name, size in context_sizes.items():
            if model_lower.startswith(model_name):
                return size

        # Default fallback
        return 8192

    def create_budget_for_model(
        self,
        model: str,
        custom_allocations: dict[str, float] | None = None,
    ) -> TokenBudget:
        """
        Create a budget based on model's context window.

        Args:
            model: Model name
            custom_allocations: Optional custom allocation percentages

        Returns:
            TokenBudget sized for the model
        """
        context_size = self.get_model_context_size(model)
        return self.create_budget(context_size, custom_allocations)