forked from cardosofelipe/fast-next-template
feat(knowledge-base): implement Knowledge Base MCP Server (#57)
Implements RAG capabilities with pgvector for semantic search: - Intelligent chunking strategies (code-aware, markdown-aware, text) - Semantic search with vector similarity (HNSW index) - Keyword search with PostgreSQL full-text search - Hybrid search using Reciprocal Rank Fusion (RRF) - Redis caching for embeddings - Collection management (ingest, search, delete, stats) - FastMCP tools: search_knowledge, ingest_content, delete_content, list_collections, get_collection_stats, update_document Testing: - 128 comprehensive tests covering all components - 58% code coverage (database integration tests use mocks) - Passes ruff linting and mypy type checking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
240
mcp-servers/knowledge-base/tests/test_collection_manager.py
Normal file
240
mcp-servers/knowledge-base/tests/test_collection_manager.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""Tests for collection manager module."""
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestCollectionManager:
|
||||
"""Tests for CollectionManager class."""
|
||||
|
||||
@pytest.fixture
|
||||
def collection_manager(self, settings, mock_database, mock_embeddings):
|
||||
"""Create collection manager with mocks."""
|
||||
from chunking.base import ChunkerFactory
|
||||
from collection_manager import CollectionManager
|
||||
|
||||
mock_chunker_factory = MagicMock(spec=ChunkerFactory)
|
||||
|
||||
# Mock chunk_content to return chunks
|
||||
from models import Chunk, ChunkType
|
||||
|
||||
mock_chunker_factory.chunk_content.return_value = [
|
||||
Chunk(
|
||||
content="def hello(): pass",
|
||||
chunk_type=ChunkType.CODE,
|
||||
token_count=10,
|
||||
)
|
||||
]
|
||||
|
||||
return CollectionManager(
|
||||
settings=settings,
|
||||
database=mock_database,
|
||||
embeddings=mock_embeddings,
|
||||
chunker_factory=mock_chunker_factory,
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ingest_content(self, collection_manager, sample_ingest_request):
|
||||
"""Test content ingestion."""
|
||||
result = await collection_manager.ingest(sample_ingest_request)
|
||||
|
||||
assert result.success is True
|
||||
assert result.chunks_created == 1
|
||||
assert result.embeddings_generated == 1
|
||||
assert len(result.chunk_ids) == 1
|
||||
assert result.collection == "default"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ingest_empty_content(self, collection_manager):
|
||||
"""Test ingesting empty content."""
|
||||
from models import IngestRequest
|
||||
|
||||
# Mock chunker to return empty list
|
||||
collection_manager._chunker_factory.chunk_content.return_value = []
|
||||
|
||||
request = IngestRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
content="",
|
||||
)
|
||||
|
||||
result = await collection_manager.ingest(request)
|
||||
|
||||
assert result.success is True
|
||||
assert result.chunks_created == 0
|
||||
assert result.embeddings_generated == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ingest_error_handling(self, collection_manager, sample_ingest_request):
|
||||
"""Test ingest error handling."""
|
||||
# Make embedding generation fail
|
||||
collection_manager._embeddings.generate_batch.side_effect = Exception("Embedding error")
|
||||
|
||||
result = await collection_manager.ingest(sample_ingest_request)
|
||||
|
||||
assert result.success is False
|
||||
assert "Embedding error" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_by_source(self, collection_manager, sample_delete_request):
|
||||
"""Test deletion by source path."""
|
||||
result = await collection_manager.delete(sample_delete_request)
|
||||
|
||||
assert result.success is True
|
||||
assert result.chunks_deleted == 1 # Mock returns 1
|
||||
collection_manager._database.delete_by_source.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_by_collection(self, collection_manager):
|
||||
"""Test deletion by collection."""
|
||||
from models import DeleteRequest
|
||||
|
||||
request = DeleteRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
collection="to-delete",
|
||||
)
|
||||
|
||||
result = await collection_manager.delete(request)
|
||||
|
||||
assert result.success is True
|
||||
collection_manager._database.delete_collection.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_by_ids(self, collection_manager):
|
||||
"""Test deletion by chunk IDs."""
|
||||
from models import DeleteRequest
|
||||
|
||||
request = DeleteRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
chunk_ids=["id-1", "id-2"],
|
||||
)
|
||||
|
||||
result = await collection_manager.delete(request)
|
||||
|
||||
assert result.success is True
|
||||
collection_manager._database.delete_by_ids.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_no_target(self, collection_manager):
|
||||
"""Test deletion with no target specified."""
|
||||
from models import DeleteRequest
|
||||
|
||||
request = DeleteRequest(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
)
|
||||
|
||||
result = await collection_manager.delete(request)
|
||||
|
||||
assert result.success is False
|
||||
assert "Must specify" in result.error
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_collections(self, collection_manager):
|
||||
"""Test listing collections."""
|
||||
from models import CollectionInfo
|
||||
|
||||
collection_manager._database.list_collections.return_value = [
|
||||
CollectionInfo(
|
||||
name="collection-1",
|
||||
project_id="proj-123",
|
||||
chunk_count=100,
|
||||
total_tokens=50000,
|
||||
file_types=["python"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
),
|
||||
CollectionInfo(
|
||||
name="collection-2",
|
||||
project_id="proj-123",
|
||||
chunk_count=50,
|
||||
total_tokens=25000,
|
||||
file_types=["javascript"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
),
|
||||
]
|
||||
|
||||
result = await collection_manager.list_collections("proj-123")
|
||||
|
||||
assert result.project_id == "proj-123"
|
||||
assert result.total_collections == 2
|
||||
assert len(result.collections) == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_collection_stats(self, collection_manager):
|
||||
"""Test getting collection statistics."""
|
||||
from models import CollectionStats
|
||||
|
||||
expected_stats = CollectionStats(
|
||||
collection="test-collection",
|
||||
project_id="proj-123",
|
||||
chunk_count=100,
|
||||
unique_sources=10,
|
||||
total_tokens=50000,
|
||||
avg_chunk_size=500.0,
|
||||
chunk_types={"code": 60, "text": 40},
|
||||
file_types={"python": 50, "javascript": 10},
|
||||
)
|
||||
collection_manager._database.get_collection_stats.return_value = expected_stats
|
||||
|
||||
stats = await collection_manager.get_collection_stats("proj-123", "test-collection")
|
||||
|
||||
assert stats.chunk_count == 100
|
||||
assert stats.unique_sources == 10
|
||||
collection_manager._database.get_collection_stats.assert_called_once_with(
|
||||
"proj-123", "test-collection"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_document(self, collection_manager):
|
||||
"""Test updating a document."""
|
||||
result = await collection_manager.update_document(
|
||||
project_id="proj-123",
|
||||
agent_id="agent-456",
|
||||
source_path="/test/file.py",
|
||||
content="def updated(): pass",
|
||||
collection="default",
|
||||
)
|
||||
|
||||
# Should delete first, then ingest
|
||||
collection_manager._database.delete_by_source.assert_called_once()
|
||||
assert result.success is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cleanup_expired(self, collection_manager):
|
||||
"""Test cleaning up expired embeddings."""
|
||||
collection_manager._database.cleanup_expired.return_value = 10
|
||||
|
||||
count = await collection_manager.cleanup_expired()
|
||||
|
||||
assert count == 10
|
||||
collection_manager._database.cleanup_expired.assert_called_once()
|
||||
|
||||
|
||||
class TestGlobalCollectionManager:
|
||||
"""Tests for global collection manager."""
|
||||
|
||||
def test_get_collection_manager_singleton(self):
|
||||
"""Test that get_collection_manager returns singleton."""
|
||||
from collection_manager import get_collection_manager, reset_collection_manager
|
||||
|
||||
reset_collection_manager()
|
||||
manager1 = get_collection_manager()
|
||||
manager2 = get_collection_manager()
|
||||
|
||||
assert manager1 is manager2
|
||||
|
||||
def test_reset_collection_manager(self):
|
||||
"""Test resetting collection manager."""
|
||||
from collection_manager import get_collection_manager, reset_collection_manager
|
||||
|
||||
manager1 = get_collection_manager()
|
||||
reset_collection_manager()
|
||||
manager2 = get_collection_manager()
|
||||
|
||||
assert manager1 is not manager2
|
||||
Reference in New Issue
Block a user