Files
syndarix/mcp-servers/knowledge-base/server.py
Felipe Cardoso 2310c8cdfd feat: Add MCP server stubs, development docs, and Docker updates
- Add MCP server skeleton implementations for all 7 planned servers
  (llm-gateway, knowledge-base, git, issues, filesystem, code-analysis, cicd)
- Add comprehensive DEVELOPMENT.md with setup and usage instructions
- Add BACKLOG.md with detailed phase planning
- Update docker-compose.dev.yml with Redis and Celery workers
- Update CLAUDE.md with Syndarix-specific context

Addresses issues #16, #20, #21

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-30 02:13:16 +01:00

163 lines
3.8 KiB
Python

"""
Syndarix Knowledge Base MCP Server.
Provides RAG capabilities with:
- pgvector for semantic search
- Per-project collection isolation
- Hybrid search (vector + keyword)
- Chunking strategies for code, markdown, and text
Per ADR-008: Knowledge Base RAG Architecture.
"""
import os
from fastmcp import FastMCP
# Create MCP server
mcp = FastMCP(
"syndarix-knowledge-base",
description="RAG with pgvector for semantic search",
)
# Configuration
DATABASE_URL = os.getenv("DATABASE_URL")
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
@mcp.tool()
async def search_knowledge(
project_id: str,
query: str,
top_k: int = 10,
search_type: str = "hybrid",
filters: dict | None = None,
) -> dict:
"""
Search the project knowledge base.
Args:
project_id: UUID of the project (scopes to project collection)
query: Search query text
top_k: Number of results to return
search_type: Search type (semantic, keyword, hybrid)
filters: Optional filters (file_type, path_prefix, etc.)
Returns:
List of matching documents with scores
"""
# TODO: Implement pgvector search
# 1. Generate query embedding via LLM Gateway
# 2. Search project-scoped collection
# 3. Apply filters
# 4. Return results with scores
return {
"status": "not_implemented",
"project_id": project_id,
"query": query,
}
@mcp.tool()
async def ingest_document(
project_id: str,
content: str,
source_path: str,
doc_type: str = "text",
metadata: dict | None = None,
) -> dict:
"""
Ingest a document into the knowledge base.
Args:
project_id: UUID of the project
content: Document content
source_path: Original file path for reference
doc_type: Document type (code, markdown, text)
metadata: Additional metadata
Returns:
Ingestion result with chunk count
"""
# TODO: Implement document ingestion
# 1. Apply chunking strategy based on doc_type
# 2. Generate embeddings for chunks
# 3. Store in project collection
return {
"status": "not_implemented",
"project_id": project_id,
"source_path": source_path,
}
@mcp.tool()
async def ingest_repository(
project_id: str,
repo_path: str,
include_patterns: list[str] | None = None,
exclude_patterns: list[str] | None = None,
) -> dict:
"""
Ingest an entire repository into the knowledge base.
Args:
project_id: UUID of the project
repo_path: Path to the repository
include_patterns: Glob patterns to include (e.g., ["*.py", "*.md"])
exclude_patterns: Glob patterns to exclude (e.g., ["node_modules/*"])
Returns:
Ingestion summary with file and chunk counts
"""
# TODO: Implement repository ingestion
return {
"status": "not_implemented",
"project_id": project_id,
"repo_path": repo_path,
}
@mcp.tool()
async def delete_document(
project_id: str,
source_path: str,
) -> dict:
"""
Delete a document from the knowledge base.
Args:
project_id: UUID of the project
source_path: Original file path
Returns:
Deletion result
"""
# TODO: Implement document deletion
return {
"status": "not_implemented",
"project_id": project_id,
"source_path": source_path,
}
@mcp.tool()
async def get_collection_stats(project_id: str) -> dict:
"""
Get statistics for a project's knowledge base collection.
Args:
project_id: UUID of the project
Returns:
Collection statistics (document count, chunk count, etc.)
"""
# TODO: Implement collection stats
return {
"status": "not_implemented",
"project_id": project_id,
}
if __name__ == "__main__":
mcp.run()