forked from cardosofelipe/fast-next-template
- Wrap store_embeddings_batch in transaction for all-or-nothing semantics - Add replace_source_embeddings method for atomic document updates - Update collection_manager to use transactional replace - Prevents race conditions and data inconsistency (closes #77) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
284 lines
7.2 KiB
Python
284 lines
7.2 KiB
Python
"""
|
|
Test fixtures for Knowledge Base MCP Server.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from datetime import UTC, datetime
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
# Set test mode before importing modules
|
|
os.environ["IS_TEST"] = "true"
|
|
os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test"
|
|
os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0"
|
|
os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001"
|
|
|
|
|
|
@pytest.fixture
|
|
def settings():
|
|
"""Create test settings."""
|
|
from config import Settings, reset_settings
|
|
|
|
reset_settings()
|
|
return Settings(
|
|
host="127.0.0.1",
|
|
port=8002,
|
|
debug=True,
|
|
database_url="postgresql://test:test@localhost:5432/test",
|
|
redis_url="redis://localhost:6379/0",
|
|
llm_gateway_url="http://localhost:8001",
|
|
embedding_dimension=1536,
|
|
code_chunk_size=500,
|
|
code_chunk_overlap=50,
|
|
markdown_chunk_size=800,
|
|
markdown_chunk_overlap=100,
|
|
text_chunk_size=400,
|
|
text_chunk_overlap=50,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_database():
|
|
"""Create mock database manager."""
|
|
from database import DatabaseManager
|
|
|
|
mock_db = MagicMock(spec=DatabaseManager)
|
|
mock_db._pool = MagicMock()
|
|
mock_db.acquire = MagicMock(return_value=AsyncMock())
|
|
|
|
# Mock database methods
|
|
mock_db.initialize = AsyncMock()
|
|
mock_db.close = AsyncMock()
|
|
mock_db.store_embedding = AsyncMock(return_value="test-id-123")
|
|
mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"])
|
|
mock_db.semantic_search = AsyncMock(return_value=[])
|
|
mock_db.keyword_search = AsyncMock(return_value=[])
|
|
mock_db.delete_by_source = AsyncMock(return_value=1)
|
|
mock_db.delete_collection = AsyncMock(return_value=5)
|
|
mock_db.delete_by_ids = AsyncMock(return_value=2)
|
|
mock_db.replace_source_embeddings = AsyncMock(return_value=(1, ["new-id-1"]))
|
|
mock_db.list_collections = AsyncMock(return_value=[])
|
|
mock_db.get_collection_stats = AsyncMock()
|
|
mock_db.cleanup_expired = AsyncMock(return_value=0)
|
|
|
|
return mock_db
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_embeddings():
|
|
"""Create mock embedding generator."""
|
|
from embeddings import EmbeddingGenerator
|
|
|
|
mock_emb = MagicMock(spec=EmbeddingGenerator)
|
|
mock_emb.initialize = AsyncMock()
|
|
mock_emb.close = AsyncMock()
|
|
|
|
# Generate fake embeddings (1536 dimensions)
|
|
def fake_embedding() -> list[float]:
|
|
return [0.1] * 1536
|
|
|
|
mock_emb.generate = AsyncMock(return_value=fake_embedding())
|
|
mock_emb.generate_batch = AsyncMock(side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts])
|
|
|
|
return mock_emb
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_redis():
|
|
"""Create mock Redis client."""
|
|
import fakeredis.aioredis
|
|
|
|
return fakeredis.aioredis.FakeRedis()
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_python_code():
|
|
"""Sample Python code for chunking tests."""
|
|
return '''"""Sample module for testing."""
|
|
|
|
import os
|
|
from typing import Any
|
|
|
|
|
|
class Calculator:
|
|
"""A simple calculator class."""
|
|
|
|
def __init__(self, initial: int = 0) -> None:
|
|
"""Initialize calculator."""
|
|
self.value = initial
|
|
|
|
def add(self, x: int) -> int:
|
|
"""Add a value."""
|
|
self.value += x
|
|
return self.value
|
|
|
|
def subtract(self, x: int) -> int:
|
|
"""Subtract a value."""
|
|
self.value -= x
|
|
return self.value
|
|
|
|
|
|
def helper_function(data: dict[str, Any]) -> str:
|
|
"""A helper function."""
|
|
return str(data)
|
|
|
|
|
|
async def async_function() -> None:
|
|
"""An async function."""
|
|
pass
|
|
'''
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_markdown():
|
|
"""Sample Markdown content for chunking tests."""
|
|
return '''# Project Documentation
|
|
|
|
This is the main documentation for our project.
|
|
|
|
## Getting Started
|
|
|
|
To get started, follow these steps:
|
|
|
|
1. Install dependencies
|
|
2. Configure settings
|
|
3. Run the application
|
|
|
|
### Prerequisites
|
|
|
|
You'll need the following installed:
|
|
|
|
- Python 3.12+
|
|
- PostgreSQL
|
|
- Redis
|
|
|
|
```python
|
|
# Example code
|
|
def main():
|
|
print("Hello, World!")
|
|
```
|
|
|
|
## API Reference
|
|
|
|
### Search Endpoint
|
|
|
|
The search endpoint allows you to query the knowledge base.
|
|
|
|
**Endpoint:** `POST /api/search`
|
|
|
|
**Request:**
|
|
```json
|
|
{
|
|
"query": "your search query",
|
|
"limit": 10
|
|
}
|
|
```
|
|
|
|
## Contributing
|
|
|
|
We welcome contributions! Please see our contributing guide.
|
|
'''
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_text():
|
|
"""Sample plain text for chunking tests."""
|
|
return '''The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.
|
|
|
|
Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.
|
|
|
|
When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.
|
|
|
|
The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
|
|
'''
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_chunk():
|
|
"""Sample chunk for testing."""
|
|
from models import Chunk, ChunkType, FileType
|
|
|
|
return Chunk(
|
|
content="def hello():\n print('Hello')",
|
|
chunk_type=ChunkType.CODE,
|
|
file_type=FileType.PYTHON,
|
|
source_path="/test/hello.py",
|
|
start_line=1,
|
|
end_line=2,
|
|
metadata={"function": "hello"},
|
|
token_count=15,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_embedding():
|
|
"""Sample knowledge embedding for testing."""
|
|
from models import ChunkType, FileType, KnowledgeEmbedding
|
|
|
|
return KnowledgeEmbedding(
|
|
id="test-id-123",
|
|
project_id="proj-123",
|
|
collection="default",
|
|
content="def hello():\n print('Hello')",
|
|
embedding=[0.1] * 1536,
|
|
chunk_type=ChunkType.CODE,
|
|
source_path="/test/hello.py",
|
|
start_line=1,
|
|
end_line=2,
|
|
file_type=FileType.PYTHON,
|
|
metadata={"function": "hello"},
|
|
content_hash="abc123",
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_ingest_request():
|
|
"""Sample ingest request for testing."""
|
|
from models import ChunkType, FileType, IngestRequest
|
|
|
|
return IngestRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
content="def hello():\n print('Hello')",
|
|
source_path="/test/hello.py",
|
|
collection="default",
|
|
chunk_type=ChunkType.CODE,
|
|
file_type=FileType.PYTHON,
|
|
metadata={"test": True},
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_search_request():
|
|
"""Sample search request for testing."""
|
|
from models import SearchRequest, SearchType
|
|
|
|
return SearchRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
query="hello function",
|
|
search_type=SearchType.HYBRID,
|
|
collection="default",
|
|
limit=10,
|
|
threshold=0.7,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_delete_request():
|
|
"""Sample delete request for testing."""
|
|
from models import DeleteRequest
|
|
|
|
return DeleteRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
source_path="/test/hello.py",
|
|
)
|