Files
syndarix/mcp-servers/knowledge-base/tests/test_models.py
Felipe Cardoso d0fc7f37ff feat(knowledge-base): implement Knowledge Base MCP Server (#57)
Implements RAG capabilities with pgvector for semantic search:

- Intelligent chunking strategies (code-aware, markdown-aware, text)
- Semantic search with vector similarity (HNSW index)
- Keyword search with PostgreSQL full-text search
- Hybrid search using Reciprocal Rank Fusion (RRF)
- Redis caching for embeddings
- Collection management (ingest, search, delete, stats)
- FastMCP tools: search_knowledge, ingest_content, delete_content,
  list_collections, get_collection_stats, update_document

Testing:
- 128 comprehensive tests covering all components
- 58% code coverage (database integration tests use mocks)
- Passes ruff linting and mypy type checking

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 21:33:26 +01:00

348 lines
11 KiB
Python

"""Tests for data models."""
from datetime import UTC, datetime
class TestEnums:
"""Tests for enum classes."""
def test_search_type_values(self):
"""Test SearchType enum values."""
from models import SearchType
assert SearchType.SEMANTIC.value == "semantic"
assert SearchType.KEYWORD.value == "keyword"
assert SearchType.HYBRID.value == "hybrid"
def test_chunk_type_values(self):
"""Test ChunkType enum values."""
from models import ChunkType
assert ChunkType.CODE.value == "code"
assert ChunkType.MARKDOWN.value == "markdown"
assert ChunkType.TEXT.value == "text"
assert ChunkType.DOCUMENTATION.value == "documentation"
def test_file_type_values(self):
"""Test FileType enum values."""
from models import FileType
assert FileType.PYTHON.value == "python"
assert FileType.JAVASCRIPT.value == "javascript"
assert FileType.TYPESCRIPT.value == "typescript"
assert FileType.MARKDOWN.value == "markdown"
class TestFileExtensionMap:
"""Tests for file extension mapping."""
def test_python_extensions(self):
"""Test Python file extensions."""
from models import FILE_EXTENSION_MAP, FileType
assert FILE_EXTENSION_MAP[".py"] == FileType.PYTHON
def test_javascript_extensions(self):
"""Test JavaScript file extensions."""
from models import FILE_EXTENSION_MAP, FileType
assert FILE_EXTENSION_MAP[".js"] == FileType.JAVASCRIPT
assert FILE_EXTENSION_MAP[".jsx"] == FileType.JAVASCRIPT
def test_typescript_extensions(self):
"""Test TypeScript file extensions."""
from models import FILE_EXTENSION_MAP, FileType
assert FILE_EXTENSION_MAP[".ts"] == FileType.TYPESCRIPT
assert FILE_EXTENSION_MAP[".tsx"] == FileType.TYPESCRIPT
def test_markdown_extensions(self):
"""Test Markdown file extensions."""
from models import FILE_EXTENSION_MAP, FileType
assert FILE_EXTENSION_MAP[".md"] == FileType.MARKDOWN
assert FILE_EXTENSION_MAP[".mdx"] == FileType.MARKDOWN
class TestChunk:
"""Tests for Chunk dataclass."""
def test_chunk_creation(self, sample_chunk):
"""Test chunk creation."""
from models import ChunkType, FileType
assert sample_chunk.content == "def hello():\n print('Hello')"
assert sample_chunk.chunk_type == ChunkType.CODE
assert sample_chunk.file_type == FileType.PYTHON
assert sample_chunk.source_path == "/test/hello.py"
assert sample_chunk.start_line == 1
assert sample_chunk.end_line == 2
assert sample_chunk.token_count == 15
def test_chunk_to_dict(self, sample_chunk):
"""Test chunk to_dict method."""
result = sample_chunk.to_dict()
assert result["content"] == "def hello():\n print('Hello')"
assert result["chunk_type"] == "code"
assert result["file_type"] == "python"
assert result["source_path"] == "/test/hello.py"
assert result["start_line"] == 1
assert result["end_line"] == 2
assert result["token_count"] == 15
class TestKnowledgeEmbedding:
"""Tests for KnowledgeEmbedding dataclass."""
def test_embedding_creation(self, sample_embedding):
"""Test embedding creation."""
assert sample_embedding.id == "test-id-123"
assert sample_embedding.project_id == "proj-123"
assert sample_embedding.collection == "default"
assert len(sample_embedding.embedding) == 1536
def test_embedding_to_dict(self, sample_embedding):
"""Test embedding to_dict method."""
result = sample_embedding.to_dict()
assert result["id"] == "test-id-123"
assert result["project_id"] == "proj-123"
assert result["collection"] == "default"
assert result["chunk_type"] == "code"
assert result["file_type"] == "python"
assert "embedding" not in result # Embedding excluded for size
class TestIngestRequest:
"""Tests for IngestRequest model."""
def test_ingest_request_creation(self, sample_ingest_request):
"""Test ingest request creation."""
from models import ChunkType, FileType
assert sample_ingest_request.project_id == "proj-123"
assert sample_ingest_request.agent_id == "agent-456"
assert sample_ingest_request.chunk_type == ChunkType.CODE
assert sample_ingest_request.file_type == FileType.PYTHON
assert sample_ingest_request.collection == "default"
def test_ingest_request_defaults(self):
"""Test ingest request default values."""
from models import ChunkType, IngestRequest
request = IngestRequest(
project_id="proj-123",
agent_id="agent-456",
content="test content",
)
assert request.collection == "default"
assert request.chunk_type == ChunkType.TEXT
assert request.file_type is None
assert request.metadata == {}
class TestIngestResult:
"""Tests for IngestResult model."""
def test_successful_result(self):
"""Test successful ingest result."""
from models import IngestResult
result = IngestResult(
success=True,
chunks_created=5,
embeddings_generated=5,
source_path="/test/file.py",
collection="default",
chunk_ids=["id1", "id2", "id3", "id4", "id5"],
)
assert result.success is True
assert result.chunks_created == 5
assert result.error is None
def test_failed_result(self):
"""Test failed ingest result."""
from models import IngestResult
result = IngestResult(
success=False,
chunks_created=0,
embeddings_generated=0,
collection="default",
chunk_ids=[],
error="Something went wrong",
)
assert result.success is False
assert result.error == "Something went wrong"
class TestSearchRequest:
"""Tests for SearchRequest model."""
def test_search_request_creation(self, sample_search_request):
"""Test search request creation."""
from models import SearchType
assert sample_search_request.project_id == "proj-123"
assert sample_search_request.query == "hello function"
assert sample_search_request.search_type == SearchType.HYBRID
assert sample_search_request.limit == 10
assert sample_search_request.threshold == 0.7
def test_search_request_defaults(self):
"""Test search request default values."""
from models import SearchRequest, SearchType
request = SearchRequest(
project_id="proj-123",
agent_id="agent-456",
query="test query",
)
assert request.search_type == SearchType.HYBRID
assert request.collection is None
assert request.limit == 10
assert request.threshold == 0.7
assert request.file_types is None
class TestSearchResult:
"""Tests for SearchResult model."""
def test_from_embedding(self, sample_embedding):
"""Test creating SearchResult from KnowledgeEmbedding."""
from models import SearchResult
result = SearchResult.from_embedding(sample_embedding, 0.95)
assert result.id == "test-id-123"
assert result.content == "def hello():\n print('Hello')"
assert result.score == 0.95
assert result.source_path == "/test/hello.py"
assert result.chunk_type == "code"
assert result.file_type == "python"
class TestSearchResponse:
"""Tests for SearchResponse model."""
def test_search_response(self):
"""Test search response creation."""
from models import SearchResponse, SearchResult
results = [
SearchResult(
id="id1",
content="test content 1",
score=0.95,
chunk_type="code",
collection="default",
),
SearchResult(
id="id2",
content="test content 2",
score=0.85,
chunk_type="text",
collection="default",
),
]
response = SearchResponse(
query="test query",
search_type="hybrid",
results=results,
total_results=2,
search_time_ms=15.5,
)
assert response.query == "test query"
assert len(response.results) == 2
assert response.search_time_ms == 15.5
class TestDeleteRequest:
"""Tests for DeleteRequest model."""
def test_delete_by_source(self, sample_delete_request):
"""Test delete request by source path."""
assert sample_delete_request.project_id == "proj-123"
assert sample_delete_request.source_path == "/test/hello.py"
assert sample_delete_request.collection is None
assert sample_delete_request.chunk_ids is None
def test_delete_by_collection(self):
"""Test delete request by collection."""
from models import DeleteRequest
request = DeleteRequest(
project_id="proj-123",
agent_id="agent-456",
collection="to-delete",
)
assert request.collection == "to-delete"
assert request.source_path is None
def test_delete_by_ids(self):
"""Test delete request by chunk IDs."""
from models import DeleteRequest
request = DeleteRequest(
project_id="proj-123",
agent_id="agent-456",
chunk_ids=["id1", "id2", "id3"],
)
assert len(request.chunk_ids) == 3
class TestCollectionInfo:
"""Tests for CollectionInfo model."""
def test_collection_info(self):
"""Test collection info creation."""
from models import CollectionInfo
info = CollectionInfo(
name="test-collection",
project_id="proj-123",
chunk_count=100,
total_tokens=50000,
file_types=["python", "javascript"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
assert info.name == "test-collection"
assert info.chunk_count == 100
assert len(info.file_types) == 2
class TestCollectionStats:
"""Tests for CollectionStats model."""
def test_collection_stats(self):
"""Test collection stats creation."""
from models import CollectionStats
stats = CollectionStats(
collection="test-collection",
project_id="proj-123",
chunk_count=100,
unique_sources=10,
total_tokens=50000,
avg_chunk_size=500.0,
chunk_types={"code": 60, "text": 40},
file_types={"python": 50, "javascript": 10},
oldest_chunk=datetime.now(UTC),
newest_chunk=datetime.now(UTC),
)
assert stats.chunk_count == 100
assert stats.unique_sources == 10
assert stats.chunk_types["code"] == 60