""" Syndarix Knowledge Base MCP Server. Provides RAG capabilities with: - pgvector for semantic search - Per-project collection isolation - Hybrid search (vector + keyword) - Chunking strategies for code, markdown, and text Per ADR-008: Knowledge Base RAG Architecture. """ import os from fastmcp import FastMCP # Create MCP server mcp = FastMCP( "syndarix-knowledge-base", description="RAG with pgvector for semantic search", ) # Configuration DATABASE_URL = os.getenv("DATABASE_URL") REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") @mcp.tool() async def search_knowledge( project_id: str, query: str, top_k: int = 10, search_type: str = "hybrid", filters: dict | None = None, ) -> dict: """ Search the project knowledge base. Args: project_id: UUID of the project (scopes to project collection) query: Search query text top_k: Number of results to return search_type: Search type (semantic, keyword, hybrid) filters: Optional filters (file_type, path_prefix, etc.) Returns: List of matching documents with scores """ # TODO: Implement pgvector search # 1. Generate query embedding via LLM Gateway # 2. Search project-scoped collection # 3. Apply filters # 4. Return results with scores return { "status": "not_implemented", "project_id": project_id, "query": query, } @mcp.tool() async def ingest_document( project_id: str, content: str, source_path: str, doc_type: str = "text", metadata: dict | None = None, ) -> dict: """ Ingest a document into the knowledge base. Args: project_id: UUID of the project content: Document content source_path: Original file path for reference doc_type: Document type (code, markdown, text) metadata: Additional metadata Returns: Ingestion result with chunk count """ # TODO: Implement document ingestion # 1. Apply chunking strategy based on doc_type # 2. Generate embeddings for chunks # 3. Store in project collection return { "status": "not_implemented", "project_id": project_id, "source_path": source_path, } @mcp.tool() async def ingest_repository( project_id: str, repo_path: str, include_patterns: list[str] | None = None, exclude_patterns: list[str] | None = None, ) -> dict: """ Ingest an entire repository into the knowledge base. Args: project_id: UUID of the project repo_path: Path to the repository include_patterns: Glob patterns to include (e.g., ["*.py", "*.md"]) exclude_patterns: Glob patterns to exclude (e.g., ["node_modules/*"]) Returns: Ingestion summary with file and chunk counts """ # TODO: Implement repository ingestion return { "status": "not_implemented", "project_id": project_id, "repo_path": repo_path, } @mcp.tool() async def delete_document( project_id: str, source_path: str, ) -> dict: """ Delete a document from the knowledge base. Args: project_id: UUID of the project source_path: Original file path Returns: Deletion result """ # TODO: Implement document deletion return { "status": "not_implemented", "project_id": project_id, "source_path": source_path, } @mcp.tool() async def get_collection_stats(project_id: str) -> dict: """ Get statistics for a project's knowledge base collection. Args: project_id: UUID of the project Returns: Collection statistics (document count, chunk count, etc.) """ # TODO: Implement collection stats return { "status": "not_implemented", "project_id": project_id, } if __name__ == "__main__": mcp.run()