forked from cardosofelipe/fast-next-template
feat: Add MCP server stubs, development docs, and Docker updates
- Add MCP server skeleton implementations for all 7 planned servers (llm-gateway, knowledge-base, git, issues, filesystem, code-analysis, cicd) - Add comprehensive DEVELOPMENT.md with setup and usage instructions - Add BACKLOG.md with detailed phase planning - Update docker-compose.dev.yml with Redis and Celery workers - Update CLAUDE.md with Syndarix-specific context Addresses issues #16, #20, #21 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
23
mcp-servers/llm-gateway/pyproject.toml
Normal file
23
mcp-servers/llm-gateway/pyproject.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[project]
|
||||
name = "syndarix-mcp-llm-gateway"
|
||||
version = "0.1.0"
|
||||
description = "Syndarix LLM Gateway MCP Server - Unified LLM access with failover and cost tracking"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"fastmcp>=0.1.0",
|
||||
"litellm>=1.50.0",
|
||||
"redis>=5.0.0",
|
||||
"pydantic>=2.0.0",
|
||||
"pydantic-settings>=2.0.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=8.0.0",
|
||||
"pytest-asyncio>=0.23.0",
|
||||
"ruff>=0.8.0",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py312"
|
||||
line-length = 88
|
||||
148
mcp-servers/llm-gateway/server.py
Normal file
148
mcp-servers/llm-gateway/server.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
Syndarix LLM Gateway MCP Server.
|
||||
|
||||
Provides unified LLM access with:
|
||||
- Multi-provider support (Claude, GPT, Gemini, Qwen, DeepSeek)
|
||||
- Automatic failover chains
|
||||
- Cost tracking via LiteLLM callbacks
|
||||
- Model group routing (high-reasoning, code-generation, fast-response, cost-optimized)
|
||||
|
||||
Per ADR-004: LLM Provider Abstraction.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
# Create MCP server
|
||||
mcp = FastMCP(
|
||||
"syndarix-llm-gateway",
|
||||
description="Unified LLM access with failover and cost tracking",
|
||||
)
|
||||
|
||||
# Configuration
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
||||
DATABASE_URL = os.getenv("DATABASE_URL")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def chat_completion(
|
||||
project_id: str,
|
||||
agent_id: str,
|
||||
messages: list[dict],
|
||||
model_group: str = "high-reasoning",
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate a chat completion using the specified model group.
|
||||
|
||||
Args:
|
||||
project_id: UUID of the project (required for cost attribution)
|
||||
agent_id: UUID of the agent instance making the request
|
||||
messages: List of message dicts with 'role' and 'content'
|
||||
model_group: Model routing group (high-reasoning, code-generation, fast-response, cost-optimized, self-hosted)
|
||||
max_tokens: Maximum tokens to generate
|
||||
temperature: Sampling temperature (0.0-2.0)
|
||||
|
||||
Returns:
|
||||
Completion response with content and usage statistics
|
||||
"""
|
||||
# TODO: Implement with LiteLLM
|
||||
# 1. Map model_group to primary model + fallbacks
|
||||
# 2. Check project budget before making request
|
||||
# 3. Make completion request with failover
|
||||
# 4. Log usage via callback
|
||||
# 5. Return response
|
||||
return {
|
||||
"status": "not_implemented",
|
||||
"project_id": project_id,
|
||||
"agent_id": agent_id,
|
||||
"model_group": model_group,
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def get_embeddings(
|
||||
project_id: str,
|
||||
texts: list[str],
|
||||
model: str = "text-embedding-3-small",
|
||||
) -> dict:
|
||||
"""
|
||||
Generate embeddings for the given texts.
|
||||
|
||||
Args:
|
||||
project_id: UUID of the project (required for cost attribution)
|
||||
texts: List of texts to embed
|
||||
model: Embedding model to use
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
"""
|
||||
# TODO: Implement with LiteLLM embeddings
|
||||
return {
|
||||
"status": "not_implemented",
|
||||
"project_id": project_id,
|
||||
"text_count": len(texts),
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def get_budget_status(project_id: str) -> dict:
|
||||
"""
|
||||
Get current budget status for a project.
|
||||
|
||||
Args:
|
||||
project_id: UUID of the project
|
||||
|
||||
Returns:
|
||||
Budget status with usage, limits, and percentage
|
||||
"""
|
||||
# TODO: Implement budget check from Redis
|
||||
return {
|
||||
"status": "not_implemented",
|
||||
"project_id": project_id,
|
||||
}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def list_available_models() -> dict:
|
||||
"""
|
||||
List all available models and their capabilities.
|
||||
|
||||
Returns:
|
||||
Dictionary of model groups and available models
|
||||
"""
|
||||
return {
|
||||
"model_groups": {
|
||||
"high-reasoning": {
|
||||
"primary": "claude-opus-4-5",
|
||||
"fallbacks": ["gpt-5.1-codex-max", "gemini-3-pro"],
|
||||
"description": "Complex analysis, architecture decisions",
|
||||
},
|
||||
"code-generation": {
|
||||
"primary": "gpt-5.1-codex-max",
|
||||
"fallbacks": ["claude-opus-4-5", "deepseek-v3.2"],
|
||||
"description": "Code writing and refactoring",
|
||||
},
|
||||
"fast-response": {
|
||||
"primary": "gemini-3-flash",
|
||||
"fallbacks": ["qwen3-235b", "deepseek-v3.2"],
|
||||
"description": "Quick tasks, simple queries",
|
||||
},
|
||||
"cost-optimized": {
|
||||
"primary": "qwen3-235b",
|
||||
"fallbacks": ["deepseek-v3.2"],
|
||||
"description": "High-volume, non-critical tasks",
|
||||
},
|
||||
"self-hosted": {
|
||||
"primary": "deepseek-v3.2",
|
||||
"fallbacks": ["qwen3-235b"],
|
||||
"description": "Privacy-sensitive, air-gapped",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run()
|
||||
Reference in New Issue
Block a user