feat(llm-gateway): implement LLM Gateway MCP Server (#56)

Implements complete LLM Gateway MCP Server with: - FastMCP server with 4 tools: chat_completion, list_models, get_usage, count_tokens - LiteLLM Router with multi-provider failover chains - Circuit breaker pattern for fault tolerance - Redis-based cost tracking per project/agent - Comprehensive test suite (209 tests, 92% coverage) Model groups defined per ADR-004: - reasoning: claude-opus-4 → gpt-4.1 → gemini-2.5-pro - code: claude-sonnet-4 → gpt-4.1 → deepseek-coder - fast: claude-haiku → gpt-4.1-mini → gemini-2.0-flash 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 20:31:19 +01:00
parent 746fb7b181
commit 6e8b0b022a
23 changed files with 9794 additions and 93 deletions
--- a/mcp-servers/llm-gateway/exceptions.py
+++ b/mcp-servers/llm-gateway/exceptions.py
@@ -0,0 +1,478 @@
+"""
+Custom exceptions for LLM Gateway MCP Server.
+
+Provides structured error handling with error codes for consistent responses.
+"""
+
+from enum import Enum
+from typing import Any
+
+
+class ErrorCode(str, Enum):
+    """Error codes for LLM Gateway errors."""
+
+    # General errors
+    UNKNOWN_ERROR = "LLM_UNKNOWN_ERROR"
+    INVALID_REQUEST = "LLM_INVALID_REQUEST"
+    CONFIGURATION_ERROR = "LLM_CONFIGURATION_ERROR"
+
+    # Provider errors
+    PROVIDER_ERROR = "LLM_PROVIDER_ERROR"
+    PROVIDER_TIMEOUT = "LLM_PROVIDER_TIMEOUT"
+    PROVIDER_RATE_LIMIT = "LLM_PROVIDER_RATE_LIMIT"
+    PROVIDER_UNAVAILABLE = "LLM_PROVIDER_UNAVAILABLE"
+    ALL_PROVIDERS_FAILED = "LLM_ALL_PROVIDERS_FAILED"
+
+    # Model errors
+    INVALID_MODEL = "LLM_INVALID_MODEL"
+    INVALID_MODEL_GROUP = "LLM_INVALID_MODEL_GROUP"
+    MODEL_NOT_AVAILABLE = "LLM_MODEL_NOT_AVAILABLE"
+
+    # Circuit breaker errors
+    CIRCUIT_OPEN = "LLM_CIRCUIT_OPEN"
+    CIRCUIT_HALF_OPEN_EXHAUSTED = "LLM_CIRCUIT_HALF_OPEN_EXHAUSTED"
+
+    # Cost errors
+    COST_LIMIT_EXCEEDED = "LLM_COST_LIMIT_EXCEEDED"
+    BUDGET_EXHAUSTED = "LLM_BUDGET_EXHAUSTED"
+
+    # Rate limiting errors
+    RATE_LIMIT_EXCEEDED = "LLM_RATE_LIMIT_EXCEEDED"
+
+    # Streaming errors
+    STREAM_ERROR = "LLM_STREAM_ERROR"
+    STREAM_INTERRUPTED = "LLM_STREAM_INTERRUPTED"
+
+    # Token errors
+    TOKEN_LIMIT_EXCEEDED = "LLM_TOKEN_LIMIT_EXCEEDED"
+    CONTEXT_TOO_LONG = "LLM_CONTEXT_TOO_LONG"
+
+
+class LLMGatewayError(Exception):
+    """Base exception for LLM Gateway errors."""
+
+    def __init__(
+        self,
+        message: str,
+        code: ErrorCode = ErrorCode.UNKNOWN_ERROR,
+        details: dict[str, Any] | None = None,
+        cause: Exception | None = None,
+    ) -> None:
+        """
+        Initialize LLM Gateway error.
+
+        Args:
+            message: Human-readable error message
+            code: Error code for programmatic handling
+            details: Additional error details
+            cause: Original exception that caused this error
+        """
+        super().__init__(message)
+        self.message = message
+        self.code = code
+        self.details = details or {}
+        self.cause = cause
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert error to dictionary for JSON response."""
+        result = {
+            "error": self.code.value,
+            "message": self.message,
+        }
+        if self.details:
+            result["details"] = self.details
+        return result
+
+    def __str__(self) -> str:
+        """String representation."""
+        return f"[{self.code.value}] {self.message}"
+
+    def __repr__(self) -> str:
+        """Detailed representation."""
+        return (
+            f"{self.__class__.__name__}("
+            f"message={self.message!r}, "
+            f"code={self.code.value!r}, "
+            f"details={self.details!r})"
+        )
+
+
+class ProviderError(LLMGatewayError):
+    """Error from an LLM provider."""
+
+    def __init__(
+        self,
+        message: str,
+        provider: str,
+        model: str | None = None,
+        status_code: int | None = None,
+        details: dict[str, Any] | None = None,
+        cause: Exception | None = None,
+    ) -> None:
+        """
+        Initialize provider error.
+
+        Args:
+            message: Error message
+            provider: Provider that failed
+            model: Model that was being used
+            status_code: HTTP status code if applicable
+            details: Additional details
+            cause: Original exception
+        """
+        error_details = details or {}
+        error_details["provider"] = provider
+        if model:
+            error_details["model"] = model
+        if status_code:
+            error_details["status_code"] = status_code
+
+        super().__init__(
+            message=message,
+            code=ErrorCode.PROVIDER_ERROR,
+            details=error_details,
+            cause=cause,
+        )
+        self.provider = provider
+        self.model = model
+        self.status_code = status_code
+
+
+class RateLimitError(LLMGatewayError):
+    """Rate limit exceeded error."""
+
+    def __init__(
+        self,
+        message: str,
+        provider: str | None = None,
+        retry_after: int | None = None,
+        details: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Initialize rate limit error.
+
+        Args:
+            message: Error message
+            provider: Provider that rate limited (None for internal limit)
+            retry_after: Seconds until retry is allowed
+            details: Additional details
+        """
+        error_details = details or {}
+        if provider:
+            error_details["provider"] = provider
+        if retry_after:
+            error_details["retry_after_seconds"] = retry_after
+
+        code = (
+            ErrorCode.PROVIDER_RATE_LIMIT
+            if provider
+            else ErrorCode.RATE_LIMIT_EXCEEDED
+        )
+
+        super().__init__(
+            message=message,
+            code=code,
+            details=error_details,
+        )
+        self.provider = provider
+        self.retry_after = retry_after
+
+
+class CircuitOpenError(LLMGatewayError):
+    """Circuit breaker is open, provider temporarily unavailable."""
+
+    def __init__(
+        self,
+        provider: str,
+        recovery_time: int | None = None,
+        details: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Initialize circuit open error.
+
+        Args:
+            provider: Provider with open circuit
+            recovery_time: Seconds until circuit may recover
+            details: Additional details
+        """
+        error_details = details or {}
+        error_details["provider"] = provider
+        if recovery_time:
+            error_details["recovery_time_seconds"] = recovery_time
+
+        super().__init__(
+            message=f"Circuit breaker open for provider {provider}",
+            code=ErrorCode.CIRCUIT_OPEN,
+            details=error_details,
+        )
+        self.provider = provider
+        self.recovery_time = recovery_time
+
+
+class CostLimitExceededError(LLMGatewayError):
+    """Cost limit exceeded for project or agent."""
+
+    def __init__(
+        self,
+        entity_type: str,
+        entity_id: str,
+        current_cost: float,
+        limit: float,
+        details: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Initialize cost limit error.
+
+        Args:
+            entity_type: 'project' or 'agent'
+            entity_id: ID of the entity
+            current_cost: Current accumulated cost
+            limit: Cost limit that was exceeded
+            details: Additional details
+        """
+        error_details = details or {}
+        error_details["entity_type"] = entity_type
+        error_details["entity_id"] = entity_id
+        error_details["current_cost_usd"] = current_cost
+        error_details["limit_usd"] = limit
+
+        super().__init__(
+            message=(
+                f"Cost limit exceeded for {entity_type} {entity_id}: "
+                f"${current_cost:.2f} >= ${limit:.2f}"
+            ),
+            code=ErrorCode.COST_LIMIT_EXCEEDED,
+            details=error_details,
+        )
+        self.entity_type = entity_type
+        self.entity_id = entity_id
+        self.current_cost = current_cost
+        self.limit = limit
+
+
+class InvalidModelGroupError(LLMGatewayError):
+    """Invalid or unknown model group."""
+
+    def __init__(
+        self,
+        model_group: str,
+        available_groups: list[str] | None = None,
+    ) -> None:
+        """
+        Initialize invalid model group error.
+
+        Args:
+            model_group: The invalid group name
+            available_groups: List of valid group names
+        """
+        details: dict[str, Any] = {"requested_group": model_group}
+        if available_groups:
+            details["available_groups"] = available_groups
+
+        super().__init__(
+            message=f"Invalid model group: {model_group}",
+            code=ErrorCode.INVALID_MODEL_GROUP,
+            details=details,
+        )
+        self.model_group = model_group
+        self.available_groups = available_groups
+
+
+class InvalidModelError(LLMGatewayError):
+    """Invalid or unknown model."""
+
+    def __init__(
+        self,
+        model: str,
+        reason: str | None = None,
+    ) -> None:
+        """
+        Initialize invalid model error.
+
+        Args:
+            model: The invalid model name
+            reason: Reason why it's invalid
+        """
+        details: dict[str, Any] = {"requested_model": model}
+        if reason:
+            details["reason"] = reason
+
+        super().__init__(
+            message=f"Invalid model: {model}" + (f" ({reason})" if reason else ""),
+            code=ErrorCode.INVALID_MODEL,
+            details=details,
+        )
+        self.model = model
+
+
+class ModelNotAvailableError(LLMGatewayError):
+    """Model not available (provider not configured)."""
+
+    def __init__(
+        self,
+        model: str,
+        provider: str,
+    ) -> None:
+        """
+        Initialize model not available error.
+
+        Args:
+            model: The unavailable model
+            provider: The provider that's not configured
+        """
+        super().__init__(
+            message=f"Model {model} not available: {provider} provider not configured",
+            code=ErrorCode.MODEL_NOT_AVAILABLE,
+            details={"model": model, "provider": provider},
+        )
+        self.model = model
+        self.provider = provider
+
+
+class AllProvidersFailedError(LLMGatewayError):
+    """All providers in the failover chain failed."""
+
+    def __init__(
+        self,
+        model_group: str,
+        attempted_models: list[str],
+        errors: list[dict[str, Any]],
+    ) -> None:
+        """
+        Initialize all providers failed error.
+
+        Args:
+            model_group: The model group that was requested
+            attempted_models: Models that were attempted
+            errors: Errors from each attempt
+        """
+        super().__init__(
+            message=f"All providers failed for model group {model_group}",
+            code=ErrorCode.ALL_PROVIDERS_FAILED,
+            details={
+                "model_group": model_group,
+                "attempted_models": attempted_models,
+                "errors": errors,
+            },
+        )
+        self.model_group = model_group
+        self.attempted_models = attempted_models
+        self.errors = errors
+
+
+class StreamError(LLMGatewayError):
+    """Error during streaming response."""
+
+    def __init__(
+        self,
+        message: str,
+        chunks_received: int = 0,
+        cause: Exception | None = None,
+    ) -> None:
+        """
+        Initialize stream error.
+
+        Args:
+            message: Error message
+            chunks_received: Number of chunks received before error
+            cause: Original exception
+        """
+        super().__init__(
+            message=message,
+            code=ErrorCode.STREAM_ERROR,
+            details={"chunks_received": chunks_received},
+            cause=cause,
+        )
+        self.chunks_received = chunks_received
+
+
+class TokenLimitExceededError(LLMGatewayError):
+    """Request exceeds model's token limit."""
+
+    def __init__(
+        self,
+        model: str,
+        token_count: int,
+        limit: int,
+    ) -> None:
+        """
+        Initialize token limit error.
+
+        Args:
+            model: Model name
+            token_count: Requested token count
+            limit: Model's token limit
+        """
+        super().__init__(
+            message=f"Token count {token_count} exceeds {model} limit of {limit}",
+            code=ErrorCode.TOKEN_LIMIT_EXCEEDED,
+            details={
+                "model": model,
+                "requested_tokens": token_count,
+                "limit": limit,
+            },
+        )
+        self.model = model
+        self.token_count = token_count
+        self.limit = limit
+
+
+class ContextTooLongError(LLMGatewayError):
+    """Input context exceeds model's context window."""
+
+    def __init__(
+        self,
+        model: str,
+        context_length: int,
+        max_context: int,
+    ) -> None:
+        """
+        Initialize context too long error.
+
+        Args:
+            model: Model name
+            context_length: Input context length
+            max_context: Model's max context window
+        """
+        super().__init__(
+            message=(
+                f"Context length {context_length} exceeds {model} "
+                f"context window of {max_context}"
+            ),
+            code=ErrorCode.CONTEXT_TOO_LONG,
+            details={
+                "model": model,
+                "context_length": context_length,
+                "max_context": max_context,
+            },
+        )
+        self.model = model
+        self.context_length = context_length
+        self.max_context = max_context
+
+
+class ConfigurationError(LLMGatewayError):
+    """Configuration error."""
+
+    def __init__(
+        self,
+        message: str,
+        config_key: str | None = None,
+    ) -> None:
+        """
+        Initialize configuration error.
+
+        Args:
+            message: Error message
+            config_key: Configuration key that's problematic
+        """
+        details: dict[str, Any] = {}
+        if config_key:
+            details["config_key"] = config_key
+
+        super().__init__(
+            message=message,
+            code=ErrorCode.CONFIGURATION_ERROR,
+            details=details,
+        )
+        self.config_key = config_key