forked from cardosofelipe/fast-next-template
feat(knowledge-base): implement Knowledge Base MCP Server (#57)
Implements RAG capabilities with pgvector for semantic search: - Intelligent chunking strategies (code-aware, markdown-aware, text) - Semantic search with vector similarity (HNSW index) - Keyword search with PostgreSQL full-text search - Hybrid search using Reciprocal Rank Fusion (RRF) - Redis caching for embeddings - Collection management (ingest, search, delete, stats) - FastMCP tools: search_knowledge, ingest_content, delete_content, list_collections, get_collection_stats, update_document Testing: - 128 comprehensive tests covering all components - 58% code coverage (database integration tests use mocks) - Passes ruff linting and mypy type checking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
282
mcp-servers/knowledge-base/tests/conftest.py
Normal file
282
mcp-servers/knowledge-base/tests/conftest.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""
|
||||
Test fixtures for Knowledge Base MCP Server.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Set test mode before importing modules
|
||||
os.environ["IS_TEST"] = "true"
|
||||
os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test"
|
||||
os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0"
|
||||
os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def settings():
|
||||
"""Create test settings."""
|
||||
from config import Settings, reset_settings
|
||||
|
||||
reset_settings()
|
||||
return Settings(
|
||||
host="127.0.0.1",
|
||||
port=8002,
|
||||
debug=True,
|
||||
database_url="postgresql://test:test@localhost:5432/test",
|
||||
redis_url="redis://localhost:6379/0",
|
||||
llm_gateway_url="http://localhost:8001",
|
||||
embedding_dimension=1536,
|
||||
code_chunk_size=500,
|
||||
code_chunk_overlap=50,
|
||||
markdown_chunk_size=800,
|
||||
markdown_chunk_overlap=100,
|
||||
text_chunk_size=400,
|
||||
text_chunk_overlap=50,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_database():
|
||||
"""Create mock database manager."""
|
||||
from database import DatabaseManager
|
||||
|
||||
mock_db = MagicMock(spec=DatabaseManager)
|
||||
mock_db._pool = MagicMock()
|
||||
mock_db.acquire = MagicMock(return_value=AsyncMock())
|
||||
|
||||
# Mock database methods
|
||||
mock_db.initialize = AsyncMock()
|
||||
mock_db.close = AsyncMock()
|
||||
mock_db.store_embedding = AsyncMock(return_value="test-id-123")
|
||||
mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"])
|
||||
mock_db.semantic_search = AsyncMock(return_value=[])
|
||||
mock_db.keyword_search = AsyncMock(return_value=[])
|
||||
mock_db.delete_by_source = AsyncMock(return_value=1)
|
||||
mock_db.delete_collection = AsyncMock(return_value=5)
|
||||
mock_db.delete_by_ids = AsyncMock(return_value=2)
|
||||
mock_db.list_collections = AsyncMock(return_value=[])
|
||||
mock_db.get_collection_stats = AsyncMock()
|
||||
mock_db.cleanup_expired = AsyncMock(return_value=0)
|
||||
|
||||
return mock_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embeddings():
|
||||
"""Create mock embedding generator."""
|
||||
from embeddings import EmbeddingGenerator
|
||||
|
||||
mock_emb = MagicMock(spec=EmbeddingGenerator)
|
||||
mock_emb.initialize = AsyncMock()
|
||||
mock_emb.close = AsyncMock()
|
||||
|
||||
# Generate fake embeddings (1536 dimensions)
|
||||
def fake_embedding() -> list[float]:
|
||||
return [0.1] * 1536
|
||||
|
||||
mock_emb.generate = AsyncMock(return_value=fake_embedding())
|
||||
mock_emb.generate_batch = AsyncMock(side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts])
|
||||
|
||||
return mock_emb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_redis():
|
||||
"""Create mock Redis client."""
|
||||
import fakeredis.aioredis
|
||||
|
||||
return fakeredis.aioredis.FakeRedis()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_python_code():
|
||||
"""Sample Python code for chunking tests."""
|
||||
return '''"""Sample module for testing."""
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Calculator:
|
||||
"""A simple calculator class."""
|
||||
|
||||
def __init__(self, initial: int = 0) -> None:
|
||||
"""Initialize calculator."""
|
||||
self.value = initial
|
||||
|
||||
def add(self, x: int) -> int:
|
||||
"""Add a value."""
|
||||
self.value += x
|
||||
return self.value
|
||||
|
||||
def subtract(self, x: int) -> int:
|
||||
"""Subtract a value."""
|
||||
self.value -= x
|
||||
return self.value
|
||||
|
||||
|
||||
def helper_function(data: dict[str, Any]) -> str:
|
||||
"""A helper function."""
|
||||
return str(data)
|
||||
|
||||
|
||||
async def async_function() -> None:
|
||||
"""An async function."""
|
||||
pass
|
||||
'''
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_markdown():
|
||||
"""Sample Markdown content for chunking tests."""
|
||||
return '''# Project Documentation
|
||||
|
||||
This is the main documentation for our project.
|
||||
|
||||
## Getting Started
|
||||
|
||||
To get started, follow these steps:
|
||||
|
||||
1. Install dependencies
|
||||
2. Configure settings
|
||||
3. Run the application
|
||||
|
||||
### Prerequisites
|
||||
|
||||
You'll need the following installed:
|
||||
|
||||
- Python 3.12+
|
||||
- PostgreSQL
|
||||
- Redis
|
||||
|
||||
```python
|
||||
# Example code
|
||||
def main():
|
||||
print("Hello, World!")
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### Search Endpoint
|
||||
|
||||
The search endpoint allows you to query the knowledge base.
|
||||
|
||||
**Endpoint:** `POST /api/search`
|
||||
|
||||
**Request:**
|
||||
```json
|
||||
{
|
||||
"query": "your search query",
|
||||
"limit": 10
|
||||
}
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions! Please see our contributing guide.
|
||||
'''
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_text():
|
||||
"""Sample plain text for chunking tests."""
|
||||
return '''The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.
|
||||
|
||||
Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.
|
||||
|
||||
When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.
|
||||
|
||||
The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
|
||||
'''
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_chunk():
|
||||
"""Sample chunk for testing."""
|
||||
from models import Chunk, ChunkType, FileType
|
||||
|
||||
return Chunk(
|
||||
content="def hello():\n print('Hello')",
|
||||
chunk_type=ChunkType.CODE,
|
||||
file_type=FileType.PYTHON,
|
||||
source_path="/test/hello.py",
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
metadata={"function": "hello"},
|
||||
token_count=15,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_embedding():
|
||||
"""Sample knowledge embedding for testing."""
|
||||
from models import ChunkType, FileType, KnowledgeEmbedding
|
||||
|
||||
return KnowledgeEmbedding(
|
||||
id="test-id-123",
|
||||
project_id="proj-123",
|
||||
collection="default",
|
||||
content="def hello():\n print('Hello')",
|
||||
embedding=[0.1] * 1536,
|
||||
chunk_type=ChunkType.CODE,
|
||||
source_path="/test/hello.py",
|
||||
start_line=1,
|
||||
end_line=2,
|
||||
file_type=FileType.PYTHON,
|
||||
metadata={"function": "hello"},
|
||||
content_hash="abc123",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_ingest_request():
|
||||
"""Sample ingest request for testing."""
|
||||
from models import ChunkType, FileType, IngestRequest
|
||||
|
||||
return IngestRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
content="def hello():\n print('Hello')",
|
||||
source_path="/test/hello.py",
|
||||
collection="default",
|
||||
chunk_type=ChunkType.CODE,
|
||||
file_type=FileType.PYTHON,
|
||||
metadata={"test": True},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_search_request():
|
||||
"""Sample search request for testing."""
|
||||
from models import SearchRequest, SearchType
|
||||
|
||||
return SearchRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
query="hello function",
|
||||
search_type=SearchType.HYBRID,
|
||||
collection="default",
|
||||
limit=10,
|
||||
threshold=0.7,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_delete_request():
|
||||
"""Sample delete request for testing."""
|
||||
from models import DeleteRequest
|
||||
|
||||
return DeleteRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
source_path="/test/hello.py",
|
||||
)
|
||||
Reference in New Issue
Block a user