Files
syndarix/mcp-servers/knowledge-base/config.py
Felipe Cardoso 953af52d0e fix(mcp-kb): address critical issues from deep review
- Fix SQL HAVING clause bug by using CTE approach (closes #73)
- Add /mcp JSON-RPC 2.0 endpoint for tool execution (closes #74)
- Add /mcp/tools endpoint for tool discovery (closes #75)
- Add content size limits to prevent DoS attacks (closes #78)
- Add comprehensive tests for new endpoints

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 01:03:58 +01:00

153 lines
4.3 KiB
Python

"""
Configuration for Knowledge Base MCP Server.
Uses pydantic-settings for environment variable loading.
"""
import os
from pydantic import Field
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings loaded from environment."""
# Server settings
host: str = Field(default="0.0.0.0", description="Server host")
port: int = Field(default=8002, description="Server port")
debug: bool = Field(default=False, description="Debug mode")
# Database settings
database_url: str = Field(
default="postgresql://postgres:postgres@localhost:5432/syndarix",
description="PostgreSQL connection URL with pgvector extension",
)
database_pool_size: int = Field(default=10, description="Connection pool size")
database_pool_max_overflow: int = Field(
default=20, description="Max overflow connections"
)
# Redis settings
redis_url: str = Field(
default="redis://localhost:6379/0",
description="Redis connection URL",
)
# LLM Gateway settings (for embeddings)
llm_gateway_url: str = Field(
default="http://localhost:8001",
description="LLM Gateway MCP server URL",
)
# Embedding settings
embedding_model: str = Field(
default="text-embedding-3-large",
description="Default embedding model",
)
embedding_dimension: int = Field(
default=1536,
description="Embedding vector dimension",
)
embedding_batch_size: int = Field(
default=100,
description="Max texts per embedding batch",
)
embedding_cache_ttl: int = Field(
default=86400,
description="Embedding cache TTL in seconds (24 hours)",
)
# Chunking settings
code_chunk_size: int = Field(
default=500,
description="Target tokens per code chunk",
)
code_chunk_overlap: int = Field(
default=50,
description="Token overlap between code chunks",
)
markdown_chunk_size: int = Field(
default=800,
description="Target tokens per markdown chunk",
)
markdown_chunk_overlap: int = Field(
default=100,
description="Token overlap between markdown chunks",
)
text_chunk_size: int = Field(
default=400,
description="Target tokens per text chunk",
)
text_chunk_overlap: int = Field(
default=50,
description="Token overlap between text chunks",
)
# Search settings
search_default_limit: int = Field(
default=10,
description="Default number of search results",
)
search_max_limit: int = Field(
default=100,
description="Maximum number of search results",
)
semantic_threshold: float = Field(
default=0.7,
description="Minimum similarity score for semantic search",
)
hybrid_semantic_weight: float = Field(
default=0.7,
description="Weight for semantic results in hybrid search",
)
hybrid_keyword_weight: float = Field(
default=0.3,
description="Weight for keyword results in hybrid search",
)
# Storage settings
embedding_ttl_days: int = Field(
default=30,
description="TTL for embedding records in days (0 = no expiry)",
)
# Content size limits (DoS prevention)
max_document_size: int = Field(
default=10 * 1024 * 1024, # 10 MB
description="Maximum size of a single document in bytes",
)
max_batch_size: int = Field(
default=100,
description="Maximum number of documents in a batch operation",
)
max_batch_total_size: int = Field(
default=50 * 1024 * 1024, # 50 MB
description="Maximum total size of all documents in a batch",
)
model_config = {"env_prefix": "KB_", "env_file": ".env", "extra": "ignore"}
# Global settings instance (lazy initialization)
_settings: Settings | None = None
def get_settings() -> Settings:
"""Get the global settings instance."""
global _settings
if _settings is None:
_settings = Settings()
return _settings
def reset_settings() -> None:
"""Reset the global settings (for testing)."""
global _settings
_settings = None
def is_test_mode() -> bool:
"""Check if running in test mode."""
return os.getenv("IS_TEST", "").lower() in ("true", "1", "yes")