Files
syndarix/mcp-servers/llm-gateway/server.py
Felipe Cardoso 2310c8cdfd feat: Add MCP server stubs, development docs, and Docker updates
- Add MCP server skeleton implementations for all 7 planned servers
  (llm-gateway, knowledge-base, git, issues, filesystem, code-analysis, cicd)
- Add comprehensive DEVELOPMENT.md with setup and usage instructions
- Add BACKLOG.md with detailed phase planning
- Update docker-compose.dev.yml with Redis and Celery workers
- Update CLAUDE.md with Syndarix-specific context

Addresses issues #16, #20, #21

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-30 02:13:16 +01:00

149 lines
4.1 KiB
Python

"""
Syndarix LLM Gateway MCP Server.
Provides unified LLM access with:
- Multi-provider support (Claude, GPT, Gemini, Qwen, DeepSeek)
- Automatic failover chains
- Cost tracking via LiteLLM callbacks
- Model group routing (high-reasoning, code-generation, fast-response, cost-optimized)
Per ADR-004: LLM Provider Abstraction.
"""
import os
from fastmcp import FastMCP
# Create MCP server
mcp = FastMCP(
"syndarix-llm-gateway",
description="Unified LLM access with failover and cost tracking",
)
# Configuration
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
DATABASE_URL = os.getenv("DATABASE_URL")
@mcp.tool()
async def chat_completion(
project_id: str,
agent_id: str,
messages: list[dict],
model_group: str = "high-reasoning",
max_tokens: int = 4096,
temperature: float = 0.7,
) -> dict:
"""
Generate a chat completion using the specified model group.
Args:
project_id: UUID of the project (required for cost attribution)
agent_id: UUID of the agent instance making the request
messages: List of message dicts with 'role' and 'content'
model_group: Model routing group (high-reasoning, code-generation, fast-response, cost-optimized, self-hosted)
max_tokens: Maximum tokens to generate
temperature: Sampling temperature (0.0-2.0)
Returns:
Completion response with content and usage statistics
"""
# TODO: Implement with LiteLLM
# 1. Map model_group to primary model + fallbacks
# 2. Check project budget before making request
# 3. Make completion request with failover
# 4. Log usage via callback
# 5. Return response
return {
"status": "not_implemented",
"project_id": project_id,
"agent_id": agent_id,
"model_group": model_group,
}
@mcp.tool()
async def get_embeddings(
project_id: str,
texts: list[str],
model: str = "text-embedding-3-small",
) -> dict:
"""
Generate embeddings for the given texts.
Args:
project_id: UUID of the project (required for cost attribution)
texts: List of texts to embed
model: Embedding model to use
Returns:
List of embedding vectors
"""
# TODO: Implement with LiteLLM embeddings
return {
"status": "not_implemented",
"project_id": project_id,
"text_count": len(texts),
}
@mcp.tool()
async def get_budget_status(project_id: str) -> dict:
"""
Get current budget status for a project.
Args:
project_id: UUID of the project
Returns:
Budget status with usage, limits, and percentage
"""
# TODO: Implement budget check from Redis
return {
"status": "not_implemented",
"project_id": project_id,
}
@mcp.tool()
async def list_available_models() -> dict:
"""
List all available models and their capabilities.
Returns:
Dictionary of model groups and available models
"""
return {
"model_groups": {
"high-reasoning": {
"primary": "claude-opus-4-5",
"fallbacks": ["gpt-5.1-codex-max", "gemini-3-pro"],
"description": "Complex analysis, architecture decisions",
},
"code-generation": {
"primary": "gpt-5.1-codex-max",
"fallbacks": ["claude-opus-4-5", "deepseek-v3.2"],
"description": "Code writing and refactoring",
},
"fast-response": {
"primary": "gemini-3-flash",
"fallbacks": ["qwen3-235b", "deepseek-v3.2"],
"description": "Quick tasks, simple queries",
},
"cost-optimized": {
"primary": "qwen3-235b",
"fallbacks": ["deepseek-v3.2"],
"description": "High-volume, non-critical tasks",
},
"self-hosted": {
"primary": "deepseek-v3.2",
"fallbacks": ["qwen3-235b"],
"description": "Privacy-sensitive, air-gapped",
},
}
}
if __name__ == "__main__":
mcp.run()