Files
Felipe Cardoso 6e8b0b022a feat(llm-gateway): implement LLM Gateway MCP Server (#56)
Implements complete LLM Gateway MCP Server with:
- FastMCP server with 4 tools: chat_completion, list_models, get_usage, count_tokens
- LiteLLM Router with multi-provider failover chains
- Circuit breaker pattern for fault tolerance
- Redis-based cost tracking per project/agent
- Comprehensive test suite (209 tests, 92% coverage)

Model groups defined per ADR-004:
- reasoning: claude-opus-4 → gpt-4.1 → gemini-2.5-pro
- code: claude-sonnet-4 → gpt-4.1 → deepseek-coder
- fast: claude-haiku → gpt-4.1-mini → gemini-2.0-flash

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 20:31:19 +01:00

180 lines
5.2 KiB
Python

"""
Configuration for LLM Gateway MCP Server.
Uses Pydantic Settings for type-safe environment variable handling.
"""
from functools import lru_cache
from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""LLM Gateway configuration settings."""
model_config = SettingsConfigDict(
env_prefix="LLM_GATEWAY_",
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# Server settings
host: str = Field(default="0.0.0.0", description="Server host")
port: int = Field(default=8001, description="Server port")
debug: bool = Field(default=False, description="Debug mode")
# Redis settings
redis_url: str = Field(
default="redis://localhost:6379/0",
description="Redis connection URL",
)
redis_prefix: str = Field(
default="llm_gateway",
description="Redis key prefix",
)
redis_ttl_hours: int = Field(
default=24,
description="Default Redis TTL in hours",
)
# Provider API keys
anthropic_api_key: str | None = Field(
default=None,
description="Anthropic API key for Claude models",
)
openai_api_key: str | None = Field(
default=None,
description="OpenAI API key for GPT models",
)
google_api_key: str | None = Field(
default=None,
description="Google API key for Gemini models",
)
alibaba_api_key: str | None = Field(
default=None,
description="Alibaba API key for Qwen models",
)
deepseek_api_key: str | None = Field(
default=None,
description="DeepSeek API key",
)
deepseek_base_url: str | None = Field(
default=None,
description="DeepSeek API base URL (for self-hosted)",
)
# LiteLLM settings
litellm_timeout: int = Field(
default=120,
description="LiteLLM request timeout in seconds",
)
litellm_max_retries: int = Field(
default=3,
description="Maximum retries per provider",
)
litellm_cache_enabled: bool = Field(
default=True,
description="Enable Redis caching for LiteLLM",
)
litellm_cache_ttl: int = Field(
default=3600,
description="Cache TTL in seconds",
)
# Circuit breaker settings
circuit_failure_threshold: int = Field(
default=5,
description="Failures before circuit opens",
)
circuit_recovery_timeout: int = Field(
default=60,
description="Seconds before circuit half-opens",
)
circuit_half_open_max_calls: int = Field(
default=3,
description="Max calls in half-open state",
)
# Cost tracking settings
cost_tracking_enabled: bool = Field(
default=True,
description="Enable cost tracking",
)
cost_alert_threshold: float = Field(
default=100.0,
description="Cost threshold for alerts (USD)",
)
default_budget_limit: float = Field(
default=1000.0,
description="Default project budget limit (USD)",
)
# Rate limiting
rate_limit_enabled: bool = Field(
default=True,
description="Enable rate limiting",
)
rate_limit_requests_per_minute: int = Field(
default=60,
description="Max requests per minute per project",
)
@field_validator("port")
@classmethod
def validate_port(cls, v: int) -> int:
"""Validate port is in valid range."""
if not 1 <= v <= 65535:
raise ValueError("Port must be between 1 and 65535")
return v
@field_validator("redis_ttl_hours")
@classmethod
def validate_ttl(cls, v: int) -> int:
"""Validate TTL is positive."""
if v <= 0:
raise ValueError("Redis TTL must be positive")
return v
@field_validator("circuit_failure_threshold")
@classmethod
def validate_failure_threshold(cls, v: int) -> int:
"""Validate failure threshold is reasonable."""
if not 1 <= v <= 100:
raise ValueError("Failure threshold must be between 1 and 100")
return v
@field_validator("litellm_timeout")
@classmethod
def validate_timeout(cls, v: int) -> int:
"""Validate timeout is reasonable."""
if not 1 <= v <= 600:
raise ValueError("Timeout must be between 1 and 600 seconds")
return v
def get_available_providers(self) -> list[str]:
"""Get list of providers with configured API keys."""
providers = []
if self.anthropic_api_key:
providers.append("anthropic")
if self.openai_api_key:
providers.append("openai")
if self.google_api_key:
providers.append("google")
if self.alibaba_api_key:
providers.append("alibaba")
if self.deepseek_api_key or self.deepseek_base_url:
providers.append("deepseek")
return providers
def has_any_provider(self) -> bool:
"""Check if at least one provider is configured."""
return len(self.get_available_providers()) > 0
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()