feat(knowledge-base): implement Knowledge Base MCP Server (#57)

Implements RAG capabilities with pgvector for semantic search:

- Intelligent chunking strategies (code-aware, markdown-aware, text)
- Semantic search with vector similarity (HNSW index)
- Keyword search with PostgreSQL full-text search
- Hybrid search using Reciprocal Rank Fusion (RRF)
- Redis caching for embeddings
- Collection management (ingest, search, delete, stats)
- FastMCP tools: search_knowledge, ingest_content, delete_content,
  list_collections, get_collection_stats, update_document

Testing:
- 128 comprehensive tests covering all components
- 58% code coverage (database integration tests use mocks)
- Passes ruff linting and mypy type checking

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-03 21:33:26 +01:00
parent 18d717e996
commit d0fc7f37ff
26 changed files with 9530 additions and 120 deletions

View File

@@ -0,0 +1,409 @@
"""
Custom exceptions for Knowledge Base MCP Server.
Provides structured error handling with error codes and details.
"""
from enum import Enum
from typing import Any
class ErrorCode(str, Enum):
"""Error codes for Knowledge Base operations."""
# General errors
UNKNOWN_ERROR = "KB_UNKNOWN_ERROR"
INVALID_REQUEST = "KB_INVALID_REQUEST"
INTERNAL_ERROR = "KB_INTERNAL_ERROR"
# Database errors
DATABASE_CONNECTION_ERROR = "KB_DATABASE_CONNECTION_ERROR"
DATABASE_QUERY_ERROR = "KB_DATABASE_QUERY_ERROR"
DATABASE_INTEGRITY_ERROR = "KB_DATABASE_INTEGRITY_ERROR"
# Embedding errors
EMBEDDING_GENERATION_ERROR = "KB_EMBEDDING_GENERATION_ERROR"
EMBEDDING_DIMENSION_MISMATCH = "KB_EMBEDDING_DIMENSION_MISMATCH"
EMBEDDING_RATE_LIMIT = "KB_EMBEDDING_RATE_LIMIT"
# Chunking errors
CHUNKING_ERROR = "KB_CHUNKING_ERROR"
UNSUPPORTED_FILE_TYPE = "KB_UNSUPPORTED_FILE_TYPE"
FILE_TOO_LARGE = "KB_FILE_TOO_LARGE"
ENCODING_ERROR = "KB_ENCODING_ERROR"
# Search errors
SEARCH_ERROR = "KB_SEARCH_ERROR"
INVALID_SEARCH_TYPE = "KB_INVALID_SEARCH_TYPE"
SEARCH_TIMEOUT = "KB_SEARCH_TIMEOUT"
# Collection errors
COLLECTION_NOT_FOUND = "KB_COLLECTION_NOT_FOUND"
COLLECTION_ALREADY_EXISTS = "KB_COLLECTION_ALREADY_EXISTS"
# Document errors
DOCUMENT_NOT_FOUND = "KB_DOCUMENT_NOT_FOUND"
DOCUMENT_ALREADY_EXISTS = "KB_DOCUMENT_ALREADY_EXISTS"
INVALID_DOCUMENT = "KB_INVALID_DOCUMENT"
# Project errors
PROJECT_NOT_FOUND = "KB_PROJECT_NOT_FOUND"
PROJECT_ACCESS_DENIED = "KB_PROJECT_ACCESS_DENIED"
class KnowledgeBaseError(Exception):
"""
Base exception for Knowledge Base errors.
All custom exceptions inherit from this class.
"""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.UNKNOWN_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
"""
Initialize Knowledge Base error.
Args:
message: Human-readable error message
code: Error code for programmatic handling
details: Additional error details
cause: Original exception that caused this error
"""
super().__init__(message)
self.message = message
self.code = code
self.details = details or {}
self.cause = cause
def to_dict(self) -> dict[str, Any]:
"""Convert error to dictionary for JSON response."""
result: dict[str, Any] = {
"error": self.code.value,
"message": self.message,
}
if self.details:
result["details"] = self.details
return result
def __str__(self) -> str:
"""String representation."""
return f"[{self.code.value}] {self.message}"
def __repr__(self) -> str:
"""Detailed representation."""
return (
f"{self.__class__.__name__}("
f"message={self.message!r}, "
f"code={self.code.value!r}, "
f"details={self.details!r})"
)
# Database Errors
class DatabaseError(KnowledgeBaseError):
"""Base class for database-related errors."""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.DATABASE_QUERY_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, code, details, cause)
class DatabaseConnectionError(DatabaseError):
"""Failed to connect to the database."""
def __init__(
self,
message: str = "Failed to connect to database",
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, ErrorCode.DATABASE_CONNECTION_ERROR, details, cause)
class DatabaseQueryError(DatabaseError):
"""Database query failed."""
def __init__(
self,
message: str,
query: str | None = None,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
details = details or {}
if query:
details["query"] = query
super().__init__(message, ErrorCode.DATABASE_QUERY_ERROR, details, cause)
# Embedding Errors
class EmbeddingError(KnowledgeBaseError):
"""Base class for embedding-related errors."""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.EMBEDDING_GENERATION_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, code, details, cause)
class EmbeddingGenerationError(EmbeddingError):
"""Failed to generate embeddings."""
def __init__(
self,
message: str = "Failed to generate embeddings",
texts_count: int | None = None,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
details = details or {}
if texts_count is not None:
details["texts_count"] = texts_count
super().__init__(message, ErrorCode.EMBEDDING_GENERATION_ERROR, details, cause)
class EmbeddingDimensionMismatchError(EmbeddingError):
"""Embedding dimension doesn't match expected dimension."""
def __init__(
self,
expected: int,
actual: int,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["expected_dimension"] = expected
details["actual_dimension"] = actual
message = f"Embedding dimension mismatch: expected {expected}, got {actual}"
super().__init__(message, ErrorCode.EMBEDDING_DIMENSION_MISMATCH, details)
# Chunking Errors
class ChunkingError(KnowledgeBaseError):
"""Base class for chunking-related errors."""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.CHUNKING_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, code, details, cause)
class UnsupportedFileTypeError(ChunkingError):
"""File type is not supported for chunking."""
def __init__(
self,
file_type: str,
supported_types: list[str] | None = None,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["file_type"] = file_type
if supported_types:
details["supported_types"] = supported_types
message = f"Unsupported file type: {file_type}"
super().__init__(message, ErrorCode.UNSUPPORTED_FILE_TYPE, details)
class FileTooLargeError(ChunkingError):
"""File exceeds maximum allowed size."""
def __init__(
self,
file_size: int,
max_size: int,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["file_size"] = file_size
details["max_size"] = max_size
message = f"File too large: {file_size} bytes exceeds limit of {max_size} bytes"
super().__init__(message, ErrorCode.FILE_TOO_LARGE, details)
class EncodingError(ChunkingError):
"""Failed to decode file content."""
def __init__(
self,
message: str = "Failed to decode file content",
encoding: str | None = None,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
details = details or {}
if encoding:
details["encoding"] = encoding
super().__init__(message, ErrorCode.ENCODING_ERROR, details, cause)
# Search Errors
class SearchError(KnowledgeBaseError):
"""Base class for search-related errors."""
def __init__(
self,
message: str,
code: ErrorCode = ErrorCode.SEARCH_ERROR,
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, code, details, cause)
class InvalidSearchTypeError(SearchError):
"""Invalid search type specified."""
def __init__(
self,
search_type: str,
valid_types: list[str] | None = None,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["search_type"] = search_type
if valid_types:
details["valid_types"] = valid_types
message = f"Invalid search type: {search_type}"
super().__init__(message, ErrorCode.INVALID_SEARCH_TYPE, details)
class SearchTimeoutError(SearchError):
"""Search operation timed out."""
def __init__(
self,
timeout: float,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["timeout"] = timeout
message = f"Search timed out after {timeout} seconds"
super().__init__(message, ErrorCode.SEARCH_TIMEOUT, details)
# Collection Errors
class CollectionError(KnowledgeBaseError):
"""Base class for collection-related errors."""
pass
class CollectionNotFoundError(CollectionError):
"""Collection does not exist."""
def __init__(
self,
collection: str,
project_id: str | None = None,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["collection"] = collection
if project_id:
details["project_id"] = project_id
message = f"Collection not found: {collection}"
super().__init__(message, ErrorCode.COLLECTION_NOT_FOUND, details)
# Document Errors
class DocumentError(KnowledgeBaseError):
"""Base class for document-related errors."""
pass
class DocumentNotFoundError(DocumentError):
"""Document does not exist."""
def __init__(
self,
source_path: str,
project_id: str | None = None,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["source_path"] = source_path
if project_id:
details["project_id"] = project_id
message = f"Document not found: {source_path}"
super().__init__(message, ErrorCode.DOCUMENT_NOT_FOUND, details)
class InvalidDocumentError(DocumentError):
"""Document content is invalid."""
def __init__(
self,
message: str = "Invalid document content",
details: dict[str, Any] | None = None,
cause: Exception | None = None,
) -> None:
super().__init__(message, ErrorCode.INVALID_DOCUMENT, details, cause)
# Project Errors
class ProjectError(KnowledgeBaseError):
"""Base class for project-related errors."""
pass
class ProjectNotFoundError(ProjectError):
"""Project does not exist."""
def __init__(
self,
project_id: str,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["project_id"] = project_id
message = f"Project not found: {project_id}"
super().__init__(message, ErrorCode.PROJECT_NOT_FOUND, details)
class ProjectAccessDeniedError(ProjectError):
"""Access to project is denied."""
def __init__(
self,
project_id: str,
details: dict[str, Any] | None = None,
) -> None:
details = details or {}
details["project_id"] = project_id
message = f"Access denied to project: {project_id}"
super().__init__(message, ErrorCode.PROJECT_ACCESS_DENIED, details)