feat(llm-gateway): implement LLM Gateway MCP Server (#56)
Implements complete LLM Gateway MCP Server with: - FastMCP server with 4 tools: chat_completion, list_models, get_usage, count_tokens - LiteLLM Router with multi-provider failover chains - Circuit breaker pattern for fault tolerance - Redis-based cost tracking per project/agent - Comprehensive test suite (209 tests, 92% coverage) Model groups defined per ADR-004: - reasoning: claude-opus-4 → gpt-4.1 → gemini-2.5-pro - code: claude-sonnet-4 → gpt-4.1 → deepseek-coder - fast: claude-haiku → gpt-4.1-mini → gemini-2.0-flash 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
478
mcp-servers/llm-gateway/exceptions.py
Normal file
478
mcp-servers/llm-gateway/exceptions.py
Normal file
@@ -0,0 +1,478 @@
|
||||
"""
|
||||
Custom exceptions for LLM Gateway MCP Server.
|
||||
|
||||
Provides structured error handling with error codes for consistent responses.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
|
||||
class ErrorCode(str, Enum):
|
||||
"""Error codes for LLM Gateway errors."""
|
||||
|
||||
# General errors
|
||||
UNKNOWN_ERROR = "LLM_UNKNOWN_ERROR"
|
||||
INVALID_REQUEST = "LLM_INVALID_REQUEST"
|
||||
CONFIGURATION_ERROR = "LLM_CONFIGURATION_ERROR"
|
||||
|
||||
# Provider errors
|
||||
PROVIDER_ERROR = "LLM_PROVIDER_ERROR"
|
||||
PROVIDER_TIMEOUT = "LLM_PROVIDER_TIMEOUT"
|
||||
PROVIDER_RATE_LIMIT = "LLM_PROVIDER_RATE_LIMIT"
|
||||
PROVIDER_UNAVAILABLE = "LLM_PROVIDER_UNAVAILABLE"
|
||||
ALL_PROVIDERS_FAILED = "LLM_ALL_PROVIDERS_FAILED"
|
||||
|
||||
# Model errors
|
||||
INVALID_MODEL = "LLM_INVALID_MODEL"
|
||||
INVALID_MODEL_GROUP = "LLM_INVALID_MODEL_GROUP"
|
||||
MODEL_NOT_AVAILABLE = "LLM_MODEL_NOT_AVAILABLE"
|
||||
|
||||
# Circuit breaker errors
|
||||
CIRCUIT_OPEN = "LLM_CIRCUIT_OPEN"
|
||||
CIRCUIT_HALF_OPEN_EXHAUSTED = "LLM_CIRCUIT_HALF_OPEN_EXHAUSTED"
|
||||
|
||||
# Cost errors
|
||||
COST_LIMIT_EXCEEDED = "LLM_COST_LIMIT_EXCEEDED"
|
||||
BUDGET_EXHAUSTED = "LLM_BUDGET_EXHAUSTED"
|
||||
|
||||
# Rate limiting errors
|
||||
RATE_LIMIT_EXCEEDED = "LLM_RATE_LIMIT_EXCEEDED"
|
||||
|
||||
# Streaming errors
|
||||
STREAM_ERROR = "LLM_STREAM_ERROR"
|
||||
STREAM_INTERRUPTED = "LLM_STREAM_INTERRUPTED"
|
||||
|
||||
# Token errors
|
||||
TOKEN_LIMIT_EXCEEDED = "LLM_TOKEN_LIMIT_EXCEEDED"
|
||||
CONTEXT_TOO_LONG = "LLM_CONTEXT_TOO_LONG"
|
||||
|
||||
|
||||
class LLMGatewayError(Exception):
|
||||
"""Base exception for LLM Gateway errors."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
code: ErrorCode = ErrorCode.UNKNOWN_ERROR,
|
||||
details: dict[str, Any] | None = None,
|
||||
cause: Exception | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize LLM Gateway error.
|
||||
|
||||
Args:
|
||||
message: Human-readable error message
|
||||
code: Error code for programmatic handling
|
||||
details: Additional error details
|
||||
cause: Original exception that caused this error
|
||||
"""
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.code = code
|
||||
self.details = details or {}
|
||||
self.cause = cause
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert error to dictionary for JSON response."""
|
||||
result = {
|
||||
"error": self.code.value,
|
||||
"message": self.message,
|
||||
}
|
||||
if self.details:
|
||||
result["details"] = self.details
|
||||
return result
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation."""
|
||||
return f"[{self.code.value}] {self.message}"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Detailed representation."""
|
||||
return (
|
||||
f"{self.__class__.__name__}("
|
||||
f"message={self.message!r}, "
|
||||
f"code={self.code.value!r}, "
|
||||
f"details={self.details!r})"
|
||||
)
|
||||
|
||||
|
||||
class ProviderError(LLMGatewayError):
|
||||
"""Error from an LLM provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
provider: str,
|
||||
model: str | None = None,
|
||||
status_code: int | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
cause: Exception | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize provider error.
|
||||
|
||||
Args:
|
||||
message: Error message
|
||||
provider: Provider that failed
|
||||
model: Model that was being used
|
||||
status_code: HTTP status code if applicable
|
||||
details: Additional details
|
||||
cause: Original exception
|
||||
"""
|
||||
error_details = details or {}
|
||||
error_details["provider"] = provider
|
||||
if model:
|
||||
error_details["model"] = model
|
||||
if status_code:
|
||||
error_details["status_code"] = status_code
|
||||
|
||||
super().__init__(
|
||||
message=message,
|
||||
code=ErrorCode.PROVIDER_ERROR,
|
||||
details=error_details,
|
||||
cause=cause,
|
||||
)
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
class RateLimitError(LLMGatewayError):
|
||||
"""Rate limit exceeded error."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
provider: str | None = None,
|
||||
retry_after: int | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize rate limit error.
|
||||
|
||||
Args:
|
||||
message: Error message
|
||||
provider: Provider that rate limited (None for internal limit)
|
||||
retry_after: Seconds until retry is allowed
|
||||
details: Additional details
|
||||
"""
|
||||
error_details = details or {}
|
||||
if provider:
|
||||
error_details["provider"] = provider
|
||||
if retry_after:
|
||||
error_details["retry_after_seconds"] = retry_after
|
||||
|
||||
code = (
|
||||
ErrorCode.PROVIDER_RATE_LIMIT
|
||||
if provider
|
||||
else ErrorCode.RATE_LIMIT_EXCEEDED
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
message=message,
|
||||
code=code,
|
||||
details=error_details,
|
||||
)
|
||||
self.provider = provider
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
class CircuitOpenError(LLMGatewayError):
|
||||
"""Circuit breaker is open, provider temporarily unavailable."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider: str,
|
||||
recovery_time: int | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize circuit open error.
|
||||
|
||||
Args:
|
||||
provider: Provider with open circuit
|
||||
recovery_time: Seconds until circuit may recover
|
||||
details: Additional details
|
||||
"""
|
||||
error_details = details or {}
|
||||
error_details["provider"] = provider
|
||||
if recovery_time:
|
||||
error_details["recovery_time_seconds"] = recovery_time
|
||||
|
||||
super().__init__(
|
||||
message=f"Circuit breaker open for provider {provider}",
|
||||
code=ErrorCode.CIRCUIT_OPEN,
|
||||
details=error_details,
|
||||
)
|
||||
self.provider = provider
|
||||
self.recovery_time = recovery_time
|
||||
|
||||
|
||||
class CostLimitExceededError(LLMGatewayError):
|
||||
"""Cost limit exceeded for project or agent."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
entity_type: str,
|
||||
entity_id: str,
|
||||
current_cost: float,
|
||||
limit: float,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize cost limit error.
|
||||
|
||||
Args:
|
||||
entity_type: 'project' or 'agent'
|
||||
entity_id: ID of the entity
|
||||
current_cost: Current accumulated cost
|
||||
limit: Cost limit that was exceeded
|
||||
details: Additional details
|
||||
"""
|
||||
error_details = details or {}
|
||||
error_details["entity_type"] = entity_type
|
||||
error_details["entity_id"] = entity_id
|
||||
error_details["current_cost_usd"] = current_cost
|
||||
error_details["limit_usd"] = limit
|
||||
|
||||
super().__init__(
|
||||
message=(
|
||||
f"Cost limit exceeded for {entity_type} {entity_id}: "
|
||||
f"${current_cost:.2f} >= ${limit:.2f}"
|
||||
),
|
||||
code=ErrorCode.COST_LIMIT_EXCEEDED,
|
||||
details=error_details,
|
||||
)
|
||||
self.entity_type = entity_type
|
||||
self.entity_id = entity_id
|
||||
self.current_cost = current_cost
|
||||
self.limit = limit
|
||||
|
||||
|
||||
class InvalidModelGroupError(LLMGatewayError):
|
||||
"""Invalid or unknown model group."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_group: str,
|
||||
available_groups: list[str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize invalid model group error.
|
||||
|
||||
Args:
|
||||
model_group: The invalid group name
|
||||
available_groups: List of valid group names
|
||||
"""
|
||||
details: dict[str, Any] = {"requested_group": model_group}
|
||||
if available_groups:
|
||||
details["available_groups"] = available_groups
|
||||
|
||||
super().__init__(
|
||||
message=f"Invalid model group: {model_group}",
|
||||
code=ErrorCode.INVALID_MODEL_GROUP,
|
||||
details=details,
|
||||
)
|
||||
self.model_group = model_group
|
||||
self.available_groups = available_groups
|
||||
|
||||
|
||||
class InvalidModelError(LLMGatewayError):
|
||||
"""Invalid or unknown model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
reason: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize invalid model error.
|
||||
|
||||
Args:
|
||||
model: The invalid model name
|
||||
reason: Reason why it's invalid
|
||||
"""
|
||||
details: dict[str, Any] = {"requested_model": model}
|
||||
if reason:
|
||||
details["reason"] = reason
|
||||
|
||||
super().__init__(
|
||||
message=f"Invalid model: {model}" + (f" ({reason})" if reason else ""),
|
||||
code=ErrorCode.INVALID_MODEL,
|
||||
details=details,
|
||||
)
|
||||
self.model = model
|
||||
|
||||
|
||||
class ModelNotAvailableError(LLMGatewayError):
|
||||
"""Model not available (provider not configured)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
provider: str,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize model not available error.
|
||||
|
||||
Args:
|
||||
model: The unavailable model
|
||||
provider: The provider that's not configured
|
||||
"""
|
||||
super().__init__(
|
||||
message=f"Model {model} not available: {provider} provider not configured",
|
||||
code=ErrorCode.MODEL_NOT_AVAILABLE,
|
||||
details={"model": model, "provider": provider},
|
||||
)
|
||||
self.model = model
|
||||
self.provider = provider
|
||||
|
||||
|
||||
class AllProvidersFailedError(LLMGatewayError):
|
||||
"""All providers in the failover chain failed."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_group: str,
|
||||
attempted_models: list[str],
|
||||
errors: list[dict[str, Any]],
|
||||
) -> None:
|
||||
"""
|
||||
Initialize all providers failed error.
|
||||
|
||||
Args:
|
||||
model_group: The model group that was requested
|
||||
attempted_models: Models that were attempted
|
||||
errors: Errors from each attempt
|
||||
"""
|
||||
super().__init__(
|
||||
message=f"All providers failed for model group {model_group}",
|
||||
code=ErrorCode.ALL_PROVIDERS_FAILED,
|
||||
details={
|
||||
"model_group": model_group,
|
||||
"attempted_models": attempted_models,
|
||||
"errors": errors,
|
||||
},
|
||||
)
|
||||
self.model_group = model_group
|
||||
self.attempted_models = attempted_models
|
||||
self.errors = errors
|
||||
|
||||
|
||||
class StreamError(LLMGatewayError):
|
||||
"""Error during streaming response."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
chunks_received: int = 0,
|
||||
cause: Exception | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize stream error.
|
||||
|
||||
Args:
|
||||
message: Error message
|
||||
chunks_received: Number of chunks received before error
|
||||
cause: Original exception
|
||||
"""
|
||||
super().__init__(
|
||||
message=message,
|
||||
code=ErrorCode.STREAM_ERROR,
|
||||
details={"chunks_received": chunks_received},
|
||||
cause=cause,
|
||||
)
|
||||
self.chunks_received = chunks_received
|
||||
|
||||
|
||||
class TokenLimitExceededError(LLMGatewayError):
|
||||
"""Request exceeds model's token limit."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
token_count: int,
|
||||
limit: int,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize token limit error.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
token_count: Requested token count
|
||||
limit: Model's token limit
|
||||
"""
|
||||
super().__init__(
|
||||
message=f"Token count {token_count} exceeds {model} limit of {limit}",
|
||||
code=ErrorCode.TOKEN_LIMIT_EXCEEDED,
|
||||
details={
|
||||
"model": model,
|
||||
"requested_tokens": token_count,
|
||||
"limit": limit,
|
||||
},
|
||||
)
|
||||
self.model = model
|
||||
self.token_count = token_count
|
||||
self.limit = limit
|
||||
|
||||
|
||||
class ContextTooLongError(LLMGatewayError):
|
||||
"""Input context exceeds model's context window."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
max_context: int,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize context too long error.
|
||||
|
||||
Args:
|
||||
model: Model name
|
||||
context_length: Input context length
|
||||
max_context: Model's max context window
|
||||
"""
|
||||
super().__init__(
|
||||
message=(
|
||||
f"Context length {context_length} exceeds {model} "
|
||||
f"context window of {max_context}"
|
||||
),
|
||||
code=ErrorCode.CONTEXT_TOO_LONG,
|
||||
details={
|
||||
"model": model,
|
||||
"context_length": context_length,
|
||||
"max_context": max_context,
|
||||
},
|
||||
)
|
||||
self.model = model
|
||||
self.context_length = context_length
|
||||
self.max_context = max_context
|
||||
|
||||
|
||||
class ConfigurationError(LLMGatewayError):
|
||||
"""Configuration error."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
config_key: str | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize configuration error.
|
||||
|
||||
Args:
|
||||
message: Error message
|
||||
config_key: Configuration key that's problematic
|
||||
"""
|
||||
details: dict[str, Any] = {}
|
||||
if config_key:
|
||||
details["config_key"] = config_key
|
||||
|
||||
super().__init__(
|
||||
message=message,
|
||||
code=ErrorCode.CONFIGURATION_ERROR,
|
||||
details=details,
|
||||
)
|
||||
self.config_key = config_key
|
||||
Reference in New Issue
Block a user