forked from cardosofelipe/fast-next-template
Implements complete LLM Gateway MCP Server with: - FastMCP server with 4 tools: chat_completion, list_models, get_usage, count_tokens - LiteLLM Router with multi-provider failover chains - Circuit breaker pattern for fault tolerance - Redis-based cost tracking per project/agent - Comprehensive test suite (209 tests, 92% coverage) Model groups defined per ADR-004: - reasoning: claude-opus-4 → gpt-4.1 → gemini-2.5-pro - code: claude-sonnet-4 → gpt-4.1 → deepseek-coder - fast: claude-haiku → gpt-4.1-mini → gemini-2.0-flash 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
180 lines
5.2 KiB
Python
180 lines
5.2 KiB
Python
"""
|
|
Configuration for LLM Gateway MCP Server.
|
|
|
|
Uses Pydantic Settings for type-safe environment variable handling.
|
|
"""
|
|
|
|
from functools import lru_cache
|
|
|
|
from pydantic import Field, field_validator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""LLM Gateway configuration settings."""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_prefix="LLM_GATEWAY_",
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
extra="ignore",
|
|
)
|
|
|
|
# Server settings
|
|
host: str = Field(default="0.0.0.0", description="Server host")
|
|
port: int = Field(default=8001, description="Server port")
|
|
debug: bool = Field(default=False, description="Debug mode")
|
|
|
|
# Redis settings
|
|
redis_url: str = Field(
|
|
default="redis://localhost:6379/0",
|
|
description="Redis connection URL",
|
|
)
|
|
redis_prefix: str = Field(
|
|
default="llm_gateway",
|
|
description="Redis key prefix",
|
|
)
|
|
redis_ttl_hours: int = Field(
|
|
default=24,
|
|
description="Default Redis TTL in hours",
|
|
)
|
|
|
|
# Provider API keys
|
|
anthropic_api_key: str | None = Field(
|
|
default=None,
|
|
description="Anthropic API key for Claude models",
|
|
)
|
|
openai_api_key: str | None = Field(
|
|
default=None,
|
|
description="OpenAI API key for GPT models",
|
|
)
|
|
google_api_key: str | None = Field(
|
|
default=None,
|
|
description="Google API key for Gemini models",
|
|
)
|
|
alibaba_api_key: str | None = Field(
|
|
default=None,
|
|
description="Alibaba API key for Qwen models",
|
|
)
|
|
deepseek_api_key: str | None = Field(
|
|
default=None,
|
|
description="DeepSeek API key",
|
|
)
|
|
deepseek_base_url: str | None = Field(
|
|
default=None,
|
|
description="DeepSeek API base URL (for self-hosted)",
|
|
)
|
|
|
|
# LiteLLM settings
|
|
litellm_timeout: int = Field(
|
|
default=120,
|
|
description="LiteLLM request timeout in seconds",
|
|
)
|
|
litellm_max_retries: int = Field(
|
|
default=3,
|
|
description="Maximum retries per provider",
|
|
)
|
|
litellm_cache_enabled: bool = Field(
|
|
default=True,
|
|
description="Enable Redis caching for LiteLLM",
|
|
)
|
|
litellm_cache_ttl: int = Field(
|
|
default=3600,
|
|
description="Cache TTL in seconds",
|
|
)
|
|
|
|
# Circuit breaker settings
|
|
circuit_failure_threshold: int = Field(
|
|
default=5,
|
|
description="Failures before circuit opens",
|
|
)
|
|
circuit_recovery_timeout: int = Field(
|
|
default=60,
|
|
description="Seconds before circuit half-opens",
|
|
)
|
|
circuit_half_open_max_calls: int = Field(
|
|
default=3,
|
|
description="Max calls in half-open state",
|
|
)
|
|
|
|
# Cost tracking settings
|
|
cost_tracking_enabled: bool = Field(
|
|
default=True,
|
|
description="Enable cost tracking",
|
|
)
|
|
cost_alert_threshold: float = Field(
|
|
default=100.0,
|
|
description="Cost threshold for alerts (USD)",
|
|
)
|
|
default_budget_limit: float = Field(
|
|
default=1000.0,
|
|
description="Default project budget limit (USD)",
|
|
)
|
|
|
|
# Rate limiting
|
|
rate_limit_enabled: bool = Field(
|
|
default=True,
|
|
description="Enable rate limiting",
|
|
)
|
|
rate_limit_requests_per_minute: int = Field(
|
|
default=60,
|
|
description="Max requests per minute per project",
|
|
)
|
|
|
|
@field_validator("port")
|
|
@classmethod
|
|
def validate_port(cls, v: int) -> int:
|
|
"""Validate port is in valid range."""
|
|
if not 1 <= v <= 65535:
|
|
raise ValueError("Port must be between 1 and 65535")
|
|
return v
|
|
|
|
@field_validator("redis_ttl_hours")
|
|
@classmethod
|
|
def validate_ttl(cls, v: int) -> int:
|
|
"""Validate TTL is positive."""
|
|
if v <= 0:
|
|
raise ValueError("Redis TTL must be positive")
|
|
return v
|
|
|
|
@field_validator("circuit_failure_threshold")
|
|
@classmethod
|
|
def validate_failure_threshold(cls, v: int) -> int:
|
|
"""Validate failure threshold is reasonable."""
|
|
if not 1 <= v <= 100:
|
|
raise ValueError("Failure threshold must be between 1 and 100")
|
|
return v
|
|
|
|
@field_validator("litellm_timeout")
|
|
@classmethod
|
|
def validate_timeout(cls, v: int) -> int:
|
|
"""Validate timeout is reasonable."""
|
|
if not 1 <= v <= 600:
|
|
raise ValueError("Timeout must be between 1 and 600 seconds")
|
|
return v
|
|
|
|
def get_available_providers(self) -> list[str]:
|
|
"""Get list of providers with configured API keys."""
|
|
providers = []
|
|
if self.anthropic_api_key:
|
|
providers.append("anthropic")
|
|
if self.openai_api_key:
|
|
providers.append("openai")
|
|
if self.google_api_key:
|
|
providers.append("google")
|
|
if self.alibaba_api_key:
|
|
providers.append("alibaba")
|
|
if self.deepseek_api_key or self.deepseek_base_url:
|
|
providers.append("deepseek")
|
|
return providers
|
|
|
|
def has_any_provider(self) -> bool:
|
|
"""Check if at least one provider is configured."""
|
|
return len(self.get_available_providers()) > 0
|
|
|
|
|
|
@lru_cache
|
|
def get_settings() -> Settings:
|
|
"""Get cached settings instance."""
|
|
return Settings()
|