forked from cardosofelipe/fast-next-template
- Add MCP server skeleton implementations for all 7 planned servers (llm-gateway, knowledge-base, git, issues, filesystem, code-analysis, cicd) - Add comprehensive DEVELOPMENT.md with setup and usage instructions - Add BACKLOG.md with detailed phase planning - Update docker-compose.dev.yml with Redis and Celery workers - Update CLAUDE.md with Syndarix-specific context Addresses issues #16, #20, #21 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
149 lines
4.1 KiB
Python
149 lines
4.1 KiB
Python
"""
|
|
Syndarix LLM Gateway MCP Server.
|
|
|
|
Provides unified LLM access with:
|
|
- Multi-provider support (Claude, GPT, Gemini, Qwen, DeepSeek)
|
|
- Automatic failover chains
|
|
- Cost tracking via LiteLLM callbacks
|
|
- Model group routing (high-reasoning, code-generation, fast-response, cost-optimized)
|
|
|
|
Per ADR-004: LLM Provider Abstraction.
|
|
"""
|
|
|
|
import os
|
|
|
|
from fastmcp import FastMCP
|
|
|
|
# Create MCP server
|
|
mcp = FastMCP(
|
|
"syndarix-llm-gateway",
|
|
description="Unified LLM access with failover and cost tracking",
|
|
)
|
|
|
|
# Configuration
|
|
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
|
DATABASE_URL = os.getenv("DATABASE_URL")
|
|
|
|
|
|
@mcp.tool()
|
|
async def chat_completion(
|
|
project_id: str,
|
|
agent_id: str,
|
|
messages: list[dict],
|
|
model_group: str = "high-reasoning",
|
|
max_tokens: int = 4096,
|
|
temperature: float = 0.7,
|
|
) -> dict:
|
|
"""
|
|
Generate a chat completion using the specified model group.
|
|
|
|
Args:
|
|
project_id: UUID of the project (required for cost attribution)
|
|
agent_id: UUID of the agent instance making the request
|
|
messages: List of message dicts with 'role' and 'content'
|
|
model_group: Model routing group (high-reasoning, code-generation, fast-response, cost-optimized, self-hosted)
|
|
max_tokens: Maximum tokens to generate
|
|
temperature: Sampling temperature (0.0-2.0)
|
|
|
|
Returns:
|
|
Completion response with content and usage statistics
|
|
"""
|
|
# TODO: Implement with LiteLLM
|
|
# 1. Map model_group to primary model + fallbacks
|
|
# 2. Check project budget before making request
|
|
# 3. Make completion request with failover
|
|
# 4. Log usage via callback
|
|
# 5. Return response
|
|
return {
|
|
"status": "not_implemented",
|
|
"project_id": project_id,
|
|
"agent_id": agent_id,
|
|
"model_group": model_group,
|
|
}
|
|
|
|
|
|
@mcp.tool()
|
|
async def get_embeddings(
|
|
project_id: str,
|
|
texts: list[str],
|
|
model: str = "text-embedding-3-small",
|
|
) -> dict:
|
|
"""
|
|
Generate embeddings for the given texts.
|
|
|
|
Args:
|
|
project_id: UUID of the project (required for cost attribution)
|
|
texts: List of texts to embed
|
|
model: Embedding model to use
|
|
|
|
Returns:
|
|
List of embedding vectors
|
|
"""
|
|
# TODO: Implement with LiteLLM embeddings
|
|
return {
|
|
"status": "not_implemented",
|
|
"project_id": project_id,
|
|
"text_count": len(texts),
|
|
}
|
|
|
|
|
|
@mcp.tool()
|
|
async def get_budget_status(project_id: str) -> dict:
|
|
"""
|
|
Get current budget status for a project.
|
|
|
|
Args:
|
|
project_id: UUID of the project
|
|
|
|
Returns:
|
|
Budget status with usage, limits, and percentage
|
|
"""
|
|
# TODO: Implement budget check from Redis
|
|
return {
|
|
"status": "not_implemented",
|
|
"project_id": project_id,
|
|
}
|
|
|
|
|
|
@mcp.tool()
|
|
async def list_available_models() -> dict:
|
|
"""
|
|
List all available models and their capabilities.
|
|
|
|
Returns:
|
|
Dictionary of model groups and available models
|
|
"""
|
|
return {
|
|
"model_groups": {
|
|
"high-reasoning": {
|
|
"primary": "claude-opus-4-5",
|
|
"fallbacks": ["gpt-5.1-codex-max", "gemini-3-pro"],
|
|
"description": "Complex analysis, architecture decisions",
|
|
},
|
|
"code-generation": {
|
|
"primary": "gpt-5.1-codex-max",
|
|
"fallbacks": ["claude-opus-4-5", "deepseek-v3.2"],
|
|
"description": "Code writing and refactoring",
|
|
},
|
|
"fast-response": {
|
|
"primary": "gemini-3-flash",
|
|
"fallbacks": ["qwen3-235b", "deepseek-v3.2"],
|
|
"description": "Quick tasks, simple queries",
|
|
},
|
|
"cost-optimized": {
|
|
"primary": "qwen3-235b",
|
|
"fallbacks": ["deepseek-v3.2"],
|
|
"description": "High-volume, non-critical tasks",
|
|
},
|
|
"self-hosted": {
|
|
"primary": "deepseek-v3.2",
|
|
"fallbacks": ["qwen3-235b"],
|
|
"description": "Privacy-sensitive, air-gapped",
|
|
},
|
|
}
|
|
}
|
|
|
|
|
|
if __name__ == "__main__":
|
|
mcp.run()
|