feat(llm-gateway): implement LLM Gateway MCP Server (#56)

Implements complete LLM Gateway MCP Server with:
- FastMCP server with 4 tools: chat_completion, list_models, get_usage, count_tokens
- LiteLLM Router with multi-provider failover chains
- Circuit breaker pattern for fault tolerance
- Redis-based cost tracking per project/agent
- Comprehensive test suite (209 tests, 92% coverage)

Model groups defined per ADR-004:
- reasoning: claude-opus-4 → gpt-4.1 → gemini-2.5-pro
- code: claude-sonnet-4 → gpt-4.1 → deepseek-coder
- fast: claude-haiku → gpt-4.1-mini → gemini-2.0-flash

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-03 20:31:19 +01:00
parent 746fb7b181
commit 6e8b0b022a
23 changed files with 9794 additions and 93 deletions

View File

@@ -0,0 +1,478 @@
"""
Custom exceptions for LLM Gateway MCP Server.
Provides structured error handling with error codes for consistent responses.
"""
from enum import Enum
from typing import Any
class ErrorCode(str, Enum):
"""Error codes for LLM Gateway errors."""
# General errors
UNKNOWN_ERROR = "LLM_UNKNOWN_ERROR"
INVALID_REQUEST = "LLM_INVALID_REQUEST"
CONFIGURATION_ERROR = "LLM_CONFIGURATION_ERROR"
# Provider errors
PROVIDER_ERROR = "LLM_PROVIDER_ERROR"
PROVIDER_TIMEOUT = "LLM_PROVIDER_TIMEOUT"
PROVIDER_RATE_LIMIT = "LLM_PROVIDER_RATE_LIMIT"
PROVIDER_UNAVAILABLE = "LLM_PROVIDER_UNAVAILABLE"
ALL_PROVIDERS_FAILED = "LLM_ALL_PROVIDERS_FAILED"
# Model errors
INVALID_MODEL = "LLM_INVALID_MODEL"
INVALID_MODEL_GROUP = "LLM_INVALID_MODEL_GROUP"
MODEL_NOT_AVAILABLE = "LLM_MODEL_NOT_AVAILABLE"
# Circuit breaker errors
CIRCUIT_OPEN = "LLM_CIRCUIT_OPEN"
CIRCUIT_HALF_OPEN_EXHAUSTED = "LLM_CIRCUIT_HALF_OPEN_EXHAUSTED"
# Cost errors
COST_LIMIT_EXCEEDED = "LLM_COST_LIMIT_EXCEEDED"
BUDGET_EXHAUSTED = "LLM_BUDGET_EXHAUSTED"
# Rate limiting errors
RATE_LIMIT_EXCEEDED = "LLM_RATE_LIMIT_EXCEEDED"
# Streaming errors
STREAM_ERROR = "LLM_STREAM_ERROR"
STREAM_INTERRUPTED = "LLM_STREAM_INTERRUPTED"
# Token errors
TOKEN_LIMIT_EXCEEDED = "LLM_TOKEN_LIMIT_EXCEEDED"
CONTEXT_TOO_LONG = "LLM_CONTEXT_TOO_LONG"
class LLMGatewayError(Exception):
"""Base exception for LLM Gateway errors."""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.UNKNOWN_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
"""
Initialize LLM Gateway error.
Args:
message: Human-readable error message
code: Error code for programmatic handling
details: Additional error details
cause: Original exception that caused this error
"""
super().__init__(message)
self.message = message
self.code = code
self.details = details or {}
self.cause = cause
def to_dict(self) -> dict[str, Any]:
"""Convert error to dictionary for JSON response."""
result = {
"error": self.code.value,
"message": self.message,
}
if self.details:
result["details"] = self.details
return result
def __str__(self) -> str:
"""String representation."""
return f"[{self.code.value}] {self.message}"
def __repr__(self) -> str:
"""Detailed representation."""
return (
f"{self.__class__.__name__}("
f"message={self.message!r}, "
f"code={self.code.value!r}, "
f"details={self.details!r})"
)
class ProviderError(LLMGatewayError):
"""Error from an LLM provider."""
def __init__(
self,
message: str,
provider: str,
model: str | None = None,
status_code: int | None = None,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
"""
Initialize provider error.
Args:
message: Error message
provider: Provider that failed
model: Model that was being used
status_code: HTTP status code if applicable
details: Additional details
cause: Original exception
"""
error_details = details or {}
error_details["provider"] = provider
if model:
error_details["model"] = model
if status_code:
error_details["status_code"] = status_code
super().__init__(
message=message,
code=ErrorCode.PROVIDER_ERROR,
details=error_details,
cause=cause,
)
self.provider = provider
self.model = model
self.status_code = status_code
class RateLimitError(LLMGatewayError):
"""Rate limit exceeded error."""
def __init__(
self,
message: str,
provider: str | None = None,
retry_after: int | None = None,
details: dict[str, Any] | None = None,
) -> None:
"""
Initialize rate limit error.
Args:
message: Error message
provider: Provider that rate limited (None for internal limit)
retry_after: Seconds until retry is allowed
details: Additional details
"""
error_details = details or {}
if provider:
error_details["provider"] = provider
if retry_after:
error_details["retry_after_seconds"] = retry_after
code = (
ErrorCode.PROVIDER_RATE_LIMIT
if provider
else ErrorCode.RATE_LIMIT_EXCEEDED
)
super().__init__(
message=message,
code=code,
details=error_details,
)
self.provider = provider
self.retry_after = retry_after
class CircuitOpenError(LLMGatewayError):
"""Circuit breaker is open, provider temporarily unavailable."""
def __init__(
self,
provider: str,
recovery_time: int | None = None,
details: dict[str, Any] | None = None,
) -> None:
"""
Initialize circuit open error.
Args:
provider: Provider with open circuit
recovery_time: Seconds until circuit may recover
details: Additional details
"""
error_details = details or {}
error_details["provider"] = provider
if recovery_time:
error_details["recovery_time_seconds"] = recovery_time
super().__init__(
message=f"Circuit breaker open for provider {provider}",
code=ErrorCode.CIRCUIT_OPEN,
details=error_details,
)
self.provider = provider
self.recovery_time = recovery_time
class CostLimitExceededError(LLMGatewayError):
"""Cost limit exceeded for project or agent."""
def __init__(
self,
entity_type: str,
entity_id: str,
current_cost: float,
limit: float,
details: dict[str, Any] | None = None,
) -> None:
"""
Initialize cost limit error.
Args:
entity_type: 'project' or 'agent'
entity_id: ID of the entity
current_cost: Current accumulated cost
limit: Cost limit that was exceeded
details: Additional details
"""
error_details = details or {}
error_details["entity_type"] = entity_type
error_details["entity_id"] = entity_id
error_details["current_cost_usd"] = current_cost
error_details["limit_usd"] = limit
super().__init__(
message=(
f"Cost limit exceeded for {entity_type} {entity_id}: "
f"${current_cost:.2f} >= ${limit:.2f}"
),
code=ErrorCode.COST_LIMIT_EXCEEDED,
details=error_details,
)
self.entity_type = entity_type
self.entity_id = entity_id
self.current_cost = current_cost
self.limit = limit
class InvalidModelGroupError(LLMGatewayError):
"""Invalid or unknown model group."""
def __init__(
self,
model_group: str,
available_groups: list[str] | None = None,
) -> None:
"""
Initialize invalid model group error.
Args:
model_group: The invalid group name
available_groups: List of valid group names
"""
details: dict[str, Any] = {"requested_group": model_group}
if available_groups:
details["available_groups"] = available_groups
super().__init__(
message=f"Invalid model group: {model_group}",
code=ErrorCode.INVALID_MODEL_GROUP,
details=details,
)
self.model_group = model_group
self.available_groups = available_groups
class InvalidModelError(LLMGatewayError):
"""Invalid or unknown model."""
def __init__(
self,
model: str,
reason: str | None = None,
) -> None:
"""
Initialize invalid model error.
Args:
model: The invalid model name
reason: Reason why it's invalid
"""
details: dict[str, Any] = {"requested_model": model}
if reason:
details["reason"] = reason
super().__init__(
message=f"Invalid model: {model}" + (f" ({reason})" if reason else ""),
code=ErrorCode.INVALID_MODEL,
details=details,
)
self.model = model
class ModelNotAvailableError(LLMGatewayError):
"""Model not available (provider not configured)."""
def __init__(
self,
model: str,
provider: str,
) -> None:
"""
Initialize model not available error.
Args:
model: The unavailable model
provider: The provider that's not configured
"""
super().__init__(
message=f"Model {model} not available: {provider} provider not configured",
code=ErrorCode.MODEL_NOT_AVAILABLE,
details={"model": model, "provider": provider},
)
self.model = model
self.provider = provider
class AllProvidersFailedError(LLMGatewayError):
"""All providers in the failover chain failed."""
def __init__(
self,
model_group: str,
attempted_models: list[str],
errors: list[dict[str, Any]],
) -> None:
"""
Initialize all providers failed error.
Args:
model_group: The model group that was requested
attempted_models: Models that were attempted
errors: Errors from each attempt
"""
super().__init__(
message=f"All providers failed for model group {model_group}",
code=ErrorCode.ALL_PROVIDERS_FAILED,
details={
"model_group": model_group,
"attempted_models": attempted_models,
"errors": errors,
},
)
self.model_group = model_group
self.attempted_models = attempted_models
self.errors = errors
class StreamError(LLMGatewayError):
"""Error during streaming response."""
def __init__(
self,
message: str,
chunks_received: int = 0,
cause: Exception | None = None,
) -> None:
"""
Initialize stream error.
Args:
message: Error message
chunks_received: Number of chunks received before error
cause: Original exception
"""
super().__init__(
message=message,
code=ErrorCode.STREAM_ERROR,
details={"chunks_received": chunks_received},
cause=cause,
)
self.chunks_received = chunks_received
class TokenLimitExceededError(LLMGatewayError):
"""Request exceeds model's token limit."""
def __init__(
self,
model: str,
token_count: int,
limit: int,
) -> None:
"""
Initialize token limit error.
Args:
model: Model name
token_count: Requested token count
limit: Model's token limit
"""
super().__init__(
message=f"Token count {token_count} exceeds {model} limit of {limit}",
code=ErrorCode.TOKEN_LIMIT_EXCEEDED,
details={
"model": model,
"requested_tokens": token_count,
"limit": limit,
},
)
self.model = model
self.token_count = token_count
self.limit = limit
class ContextTooLongError(LLMGatewayError):
"""Input context exceeds model's context window."""
def __init__(
self,
model: str,
context_length: int,
max_context: int,
) -> None:
"""
Initialize context too long error.
Args:
model: Model name
context_length: Input context length
max_context: Model's max context window
"""
super().__init__(
message=(
f"Context length {context_length} exceeds {model} "
f"context window of {max_context}"
),
code=ErrorCode.CONTEXT_TOO_LONG,
details={
"model": model,
"context_length": context_length,
"max_context": max_context,
},
)
self.model = model
self.context_length = context_length
self.max_context = max_context
class ConfigurationError(LLMGatewayError):
"""Configuration error."""
def __init__(
self,
message: str,
config_key: str | None = None,
) -> None:
"""
Initialize configuration error.
Args:
message: Error message
config_key: Configuration key that's problematic
"""
details: dict[str, Any] = {}
if config_key:
details["config_key"] = config_key
super().__init__(
message=message,
code=ErrorCode.CONFIGURATION_ERROR,
details=details,
)
self.config_key = config_key