forked from cardosofelipe/fast-next-template
Implements RAG capabilities with pgvector for semantic search: - Intelligent chunking strategies (code-aware, markdown-aware, text) - Semantic search with vector similarity (HNSW index) - Keyword search with PostgreSQL full-text search - Hybrid search using Reciprocal Rank Fusion (RRF) - Redis caching for embeddings - Collection management (ingest, search, delete, stats) - FastMCP tools: search_knowledge, ingest_content, delete_content, list_collections, get_collection_stats, update_document Testing: - 128 comprehensive tests covering all components - 58% code coverage (database integration tests use mocks) - Passes ruff linting and mypy type checking 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
348 lines
11 KiB
Python
348 lines
11 KiB
Python
"""Tests for data models."""
|
|
|
|
from datetime import UTC, datetime
|
|
|
|
|
|
class TestEnums:
|
|
"""Tests for enum classes."""
|
|
|
|
def test_search_type_values(self):
|
|
"""Test SearchType enum values."""
|
|
from models import SearchType
|
|
|
|
assert SearchType.SEMANTIC.value == "semantic"
|
|
assert SearchType.KEYWORD.value == "keyword"
|
|
assert SearchType.HYBRID.value == "hybrid"
|
|
|
|
def test_chunk_type_values(self):
|
|
"""Test ChunkType enum values."""
|
|
from models import ChunkType
|
|
|
|
assert ChunkType.CODE.value == "code"
|
|
assert ChunkType.MARKDOWN.value == "markdown"
|
|
assert ChunkType.TEXT.value == "text"
|
|
assert ChunkType.DOCUMENTATION.value == "documentation"
|
|
|
|
def test_file_type_values(self):
|
|
"""Test FileType enum values."""
|
|
from models import FileType
|
|
|
|
assert FileType.PYTHON.value == "python"
|
|
assert FileType.JAVASCRIPT.value == "javascript"
|
|
assert FileType.TYPESCRIPT.value == "typescript"
|
|
assert FileType.MARKDOWN.value == "markdown"
|
|
|
|
|
|
class TestFileExtensionMap:
|
|
"""Tests for file extension mapping."""
|
|
|
|
def test_python_extensions(self):
|
|
"""Test Python file extensions."""
|
|
from models import FILE_EXTENSION_MAP, FileType
|
|
|
|
assert FILE_EXTENSION_MAP[".py"] == FileType.PYTHON
|
|
|
|
def test_javascript_extensions(self):
|
|
"""Test JavaScript file extensions."""
|
|
from models import FILE_EXTENSION_MAP, FileType
|
|
|
|
assert FILE_EXTENSION_MAP[".js"] == FileType.JAVASCRIPT
|
|
assert FILE_EXTENSION_MAP[".jsx"] == FileType.JAVASCRIPT
|
|
|
|
def test_typescript_extensions(self):
|
|
"""Test TypeScript file extensions."""
|
|
from models import FILE_EXTENSION_MAP, FileType
|
|
|
|
assert FILE_EXTENSION_MAP[".ts"] == FileType.TYPESCRIPT
|
|
assert FILE_EXTENSION_MAP[".tsx"] == FileType.TYPESCRIPT
|
|
|
|
def test_markdown_extensions(self):
|
|
"""Test Markdown file extensions."""
|
|
from models import FILE_EXTENSION_MAP, FileType
|
|
|
|
assert FILE_EXTENSION_MAP[".md"] == FileType.MARKDOWN
|
|
assert FILE_EXTENSION_MAP[".mdx"] == FileType.MARKDOWN
|
|
|
|
|
|
class TestChunk:
|
|
"""Tests for Chunk dataclass."""
|
|
|
|
def test_chunk_creation(self, sample_chunk):
|
|
"""Test chunk creation."""
|
|
from models import ChunkType, FileType
|
|
|
|
assert sample_chunk.content == "def hello():\n print('Hello')"
|
|
assert sample_chunk.chunk_type == ChunkType.CODE
|
|
assert sample_chunk.file_type == FileType.PYTHON
|
|
assert sample_chunk.source_path == "/test/hello.py"
|
|
assert sample_chunk.start_line == 1
|
|
assert sample_chunk.end_line == 2
|
|
assert sample_chunk.token_count == 15
|
|
|
|
def test_chunk_to_dict(self, sample_chunk):
|
|
"""Test chunk to_dict method."""
|
|
result = sample_chunk.to_dict()
|
|
|
|
assert result["content"] == "def hello():\n print('Hello')"
|
|
assert result["chunk_type"] == "code"
|
|
assert result["file_type"] == "python"
|
|
assert result["source_path"] == "/test/hello.py"
|
|
assert result["start_line"] == 1
|
|
assert result["end_line"] == 2
|
|
assert result["token_count"] == 15
|
|
|
|
|
|
class TestKnowledgeEmbedding:
|
|
"""Tests for KnowledgeEmbedding dataclass."""
|
|
|
|
def test_embedding_creation(self, sample_embedding):
|
|
"""Test embedding creation."""
|
|
assert sample_embedding.id == "test-id-123"
|
|
assert sample_embedding.project_id == "proj-123"
|
|
assert sample_embedding.collection == "default"
|
|
assert len(sample_embedding.embedding) == 1536
|
|
|
|
def test_embedding_to_dict(self, sample_embedding):
|
|
"""Test embedding to_dict method."""
|
|
result = sample_embedding.to_dict()
|
|
|
|
assert result["id"] == "test-id-123"
|
|
assert result["project_id"] == "proj-123"
|
|
assert result["collection"] == "default"
|
|
assert result["chunk_type"] == "code"
|
|
assert result["file_type"] == "python"
|
|
assert "embedding" not in result # Embedding excluded for size
|
|
|
|
|
|
class TestIngestRequest:
|
|
"""Tests for IngestRequest model."""
|
|
|
|
def test_ingest_request_creation(self, sample_ingest_request):
|
|
"""Test ingest request creation."""
|
|
from models import ChunkType, FileType
|
|
|
|
assert sample_ingest_request.project_id == "proj-123"
|
|
assert sample_ingest_request.agent_id == "agent-456"
|
|
assert sample_ingest_request.chunk_type == ChunkType.CODE
|
|
assert sample_ingest_request.file_type == FileType.PYTHON
|
|
assert sample_ingest_request.collection == "default"
|
|
|
|
def test_ingest_request_defaults(self):
|
|
"""Test ingest request default values."""
|
|
from models import ChunkType, IngestRequest
|
|
|
|
request = IngestRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
content="test content",
|
|
)
|
|
|
|
assert request.collection == "default"
|
|
assert request.chunk_type == ChunkType.TEXT
|
|
assert request.file_type is None
|
|
assert request.metadata == {}
|
|
|
|
|
|
class TestIngestResult:
|
|
"""Tests for IngestResult model."""
|
|
|
|
def test_successful_result(self):
|
|
"""Test successful ingest result."""
|
|
from models import IngestResult
|
|
|
|
result = IngestResult(
|
|
success=True,
|
|
chunks_created=5,
|
|
embeddings_generated=5,
|
|
source_path="/test/file.py",
|
|
collection="default",
|
|
chunk_ids=["id1", "id2", "id3", "id4", "id5"],
|
|
)
|
|
|
|
assert result.success is True
|
|
assert result.chunks_created == 5
|
|
assert result.error is None
|
|
|
|
def test_failed_result(self):
|
|
"""Test failed ingest result."""
|
|
from models import IngestResult
|
|
|
|
result = IngestResult(
|
|
success=False,
|
|
chunks_created=0,
|
|
embeddings_generated=0,
|
|
collection="default",
|
|
chunk_ids=[],
|
|
error="Something went wrong",
|
|
)
|
|
|
|
assert result.success is False
|
|
assert result.error == "Something went wrong"
|
|
|
|
|
|
class TestSearchRequest:
|
|
"""Tests for SearchRequest model."""
|
|
|
|
def test_search_request_creation(self, sample_search_request):
|
|
"""Test search request creation."""
|
|
from models import SearchType
|
|
|
|
assert sample_search_request.project_id == "proj-123"
|
|
assert sample_search_request.query == "hello function"
|
|
assert sample_search_request.search_type == SearchType.HYBRID
|
|
assert sample_search_request.limit == 10
|
|
assert sample_search_request.threshold == 0.7
|
|
|
|
def test_search_request_defaults(self):
|
|
"""Test search request default values."""
|
|
from models import SearchRequest, SearchType
|
|
|
|
request = SearchRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
query="test query",
|
|
)
|
|
|
|
assert request.search_type == SearchType.HYBRID
|
|
assert request.collection is None
|
|
assert request.limit == 10
|
|
assert request.threshold == 0.7
|
|
assert request.file_types is None
|
|
|
|
|
|
class TestSearchResult:
|
|
"""Tests for SearchResult model."""
|
|
|
|
def test_from_embedding(self, sample_embedding):
|
|
"""Test creating SearchResult from KnowledgeEmbedding."""
|
|
from models import SearchResult
|
|
|
|
result = SearchResult.from_embedding(sample_embedding, 0.95)
|
|
|
|
assert result.id == "test-id-123"
|
|
assert result.content == "def hello():\n print('Hello')"
|
|
assert result.score == 0.95
|
|
assert result.source_path == "/test/hello.py"
|
|
assert result.chunk_type == "code"
|
|
assert result.file_type == "python"
|
|
|
|
|
|
class TestSearchResponse:
|
|
"""Tests for SearchResponse model."""
|
|
|
|
def test_search_response(self):
|
|
"""Test search response creation."""
|
|
from models import SearchResponse, SearchResult
|
|
|
|
results = [
|
|
SearchResult(
|
|
id="id1",
|
|
content="test content 1",
|
|
score=0.95,
|
|
chunk_type="code",
|
|
collection="default",
|
|
),
|
|
SearchResult(
|
|
id="id2",
|
|
content="test content 2",
|
|
score=0.85,
|
|
chunk_type="text",
|
|
collection="default",
|
|
),
|
|
]
|
|
|
|
response = SearchResponse(
|
|
query="test query",
|
|
search_type="hybrid",
|
|
results=results,
|
|
total_results=2,
|
|
search_time_ms=15.5,
|
|
)
|
|
|
|
assert response.query == "test query"
|
|
assert len(response.results) == 2
|
|
assert response.search_time_ms == 15.5
|
|
|
|
|
|
class TestDeleteRequest:
|
|
"""Tests for DeleteRequest model."""
|
|
|
|
def test_delete_by_source(self, sample_delete_request):
|
|
"""Test delete request by source path."""
|
|
assert sample_delete_request.project_id == "proj-123"
|
|
assert sample_delete_request.source_path == "/test/hello.py"
|
|
assert sample_delete_request.collection is None
|
|
assert sample_delete_request.chunk_ids is None
|
|
|
|
def test_delete_by_collection(self):
|
|
"""Test delete request by collection."""
|
|
from models import DeleteRequest
|
|
|
|
request = DeleteRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
collection="to-delete",
|
|
)
|
|
|
|
assert request.collection == "to-delete"
|
|
assert request.source_path is None
|
|
|
|
def test_delete_by_ids(self):
|
|
"""Test delete request by chunk IDs."""
|
|
from models import DeleteRequest
|
|
|
|
request = DeleteRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
chunk_ids=["id1", "id2", "id3"],
|
|
)
|
|
|
|
assert len(request.chunk_ids) == 3
|
|
|
|
|
|
class TestCollectionInfo:
|
|
"""Tests for CollectionInfo model."""
|
|
|
|
def test_collection_info(self):
|
|
"""Test collection info creation."""
|
|
from models import CollectionInfo
|
|
|
|
info = CollectionInfo(
|
|
name="test-collection",
|
|
project_id="proj-123",
|
|
chunk_count=100,
|
|
total_tokens=50000,
|
|
file_types=["python", "javascript"],
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
|
|
assert info.name == "test-collection"
|
|
assert info.chunk_count == 100
|
|
assert len(info.file_types) == 2
|
|
|
|
|
|
class TestCollectionStats:
|
|
"""Tests for CollectionStats model."""
|
|
|
|
def test_collection_stats(self):
|
|
"""Test collection stats creation."""
|
|
from models import CollectionStats
|
|
|
|
stats = CollectionStats(
|
|
collection="test-collection",
|
|
project_id="proj-123",
|
|
chunk_count=100,
|
|
unique_sources=10,
|
|
total_tokens=50000,
|
|
avg_chunk_size=500.0,
|
|
chunk_types={"code": 60, "text": 40},
|
|
file_types={"python": 50, "javascript": 10},
|
|
oldest_chunk=datetime.now(UTC),
|
|
newest_chunk=datetime.now(UTC),
|
|
)
|
|
|
|
assert stats.chunk_count == 100
|
|
assert stats.unique_sources == 10
|
|
assert stats.chunk_types["code"] == 60
|