Files
syndarix/mcp-servers/knowledge-base/tests/conftest.py
Felipe Cardoso cd7a9ccbdf fix(mcp-kb): add transactional batch insert and atomic document update
- Wrap store_embeddings_batch in transaction for all-or-nothing semantics
- Add replace_source_embeddings method for atomic document updates
- Update collection_manager to use transactional replace
- Prevents race conditions and data inconsistency (closes #77)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 01:07:40 +01:00

284 lines
7.2 KiB
Python

"""
Test fixtures for Knowledge Base MCP Server.
"""
import os
import sys
from datetime import UTC, datetime
from unittest.mock import AsyncMock, MagicMock
import pytest
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Set test mode before importing modules
os.environ["IS_TEST"] = "true"
os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test"
os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0"
os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001"
@pytest.fixture
def settings():
"""Create test settings."""
from config import Settings, reset_settings
reset_settings()
return Settings(
host="127.0.0.1",
port=8002,
debug=True,
database_url="postgresql://test:test@localhost:5432/test",
redis_url="redis://localhost:6379/0",
llm_gateway_url="http://localhost:8001",
embedding_dimension=1536,
code_chunk_size=500,
code_chunk_overlap=50,
markdown_chunk_size=800,
markdown_chunk_overlap=100,
text_chunk_size=400,
text_chunk_overlap=50,
)
@pytest.fixture
def mock_database():
"""Create mock database manager."""
from database import DatabaseManager
mock_db = MagicMock(spec=DatabaseManager)
mock_db._pool = MagicMock()
mock_db.acquire = MagicMock(return_value=AsyncMock())
# Mock database methods
mock_db.initialize = AsyncMock()
mock_db.close = AsyncMock()
mock_db.store_embedding = AsyncMock(return_value="test-id-123")
mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"])
mock_db.semantic_search = AsyncMock(return_value=[])
mock_db.keyword_search = AsyncMock(return_value=[])
mock_db.delete_by_source = AsyncMock(return_value=1)
mock_db.delete_collection = AsyncMock(return_value=5)
mock_db.delete_by_ids = AsyncMock(return_value=2)
mock_db.replace_source_embeddings = AsyncMock(return_value=(1, ["new-id-1"]))
mock_db.list_collections = AsyncMock(return_value=[])
mock_db.get_collection_stats = AsyncMock()
mock_db.cleanup_expired = AsyncMock(return_value=0)
return mock_db
@pytest.fixture
def mock_embeddings():
"""Create mock embedding generator."""
from embeddings import EmbeddingGenerator
mock_emb = MagicMock(spec=EmbeddingGenerator)
mock_emb.initialize = AsyncMock()
mock_emb.close = AsyncMock()
# Generate fake embeddings (1536 dimensions)
def fake_embedding() -> list[float]:
return [0.1] * 1536
mock_emb.generate = AsyncMock(return_value=fake_embedding())
mock_emb.generate_batch = AsyncMock(side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts])
return mock_emb
@pytest.fixture
def mock_redis():
"""Create mock Redis client."""
import fakeredis.aioredis
return fakeredis.aioredis.FakeRedis()
@pytest.fixture
def sample_python_code():
"""Sample Python code for chunking tests."""
return '''"""Sample module for testing."""
import os
from typing import Any
class Calculator:
"""A simple calculator class."""
def __init__(self, initial: int = 0) -> None:
"""Initialize calculator."""
self.value = initial
def add(self, x: int) -> int:
"""Add a value."""
self.value += x
return self.value
def subtract(self, x: int) -> int:
"""Subtract a value."""
self.value -= x
return self.value
def helper_function(data: dict[str, Any]) -> str:
"""A helper function."""
return str(data)
async def async_function() -> None:
"""An async function."""
pass
'''
@pytest.fixture
def sample_markdown():
"""Sample Markdown content for chunking tests."""
return '''# Project Documentation
This is the main documentation for our project.
## Getting Started
To get started, follow these steps:
1. Install dependencies
2. Configure settings
3. Run the application
### Prerequisites
You'll need the following installed:
- Python 3.12+
- PostgreSQL
- Redis
```python
# Example code
def main():
print("Hello, World!")
```
## API Reference
### Search Endpoint
The search endpoint allows you to query the knowledge base.
**Endpoint:** `POST /api/search`
**Request:**
```json
{
"query": "your search query",
"limit": 10
}
```
## Contributing
We welcome contributions! Please see our contributing guide.
'''
@pytest.fixture
def sample_text():
"""Sample plain text for chunking tests."""
return '''The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.
Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.
When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.
The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
'''
@pytest.fixture
def sample_chunk():
"""Sample chunk for testing."""
from models import Chunk, ChunkType, FileType
return Chunk(
content="def hello():\n print('Hello')",
chunk_type=ChunkType.CODE,
file_type=FileType.PYTHON,
source_path="/test/hello.py",
start_line=1,
end_line=2,
metadata={"function": "hello"},
token_count=15,
)
@pytest.fixture
def sample_embedding():
"""Sample knowledge embedding for testing."""
from models import ChunkType, FileType, KnowledgeEmbedding
return KnowledgeEmbedding(
id="test-id-123",
project_id="proj-123",
collection="default",
content="def hello():\n print('Hello')",
embedding=[0.1] * 1536,
chunk_type=ChunkType.CODE,
source_path="/test/hello.py",
start_line=1,
end_line=2,
file_type=FileType.PYTHON,
metadata={"function": "hello"},
content_hash="abc123",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
@pytest.fixture
def sample_ingest_request():
"""Sample ingest request for testing."""
from models import ChunkType, FileType, IngestRequest
return IngestRequest(
project_id="proj-123",
agent_id="agent-456",
content="def hello():\n print('Hello')",
source_path="/test/hello.py",
collection="default",
chunk_type=ChunkType.CODE,
file_type=FileType.PYTHON,
metadata={"test": True},
)
@pytest.fixture
def sample_search_request():
"""Sample search request for testing."""
from models import SearchRequest, SearchType
return SearchRequest(
project_id="proj-123",
agent_id="agent-456",
query="hello function",
search_type=SearchType.HYBRID,
collection="default",
limit=10,
threshold=0.7,
)
@pytest.fixture
def sample_delete_request():
"""Sample delete request for testing."""
from models import DeleteRequest
return DeleteRequest(
project_id="proj-123",
agent_id="agent-456",
source_path="/test/hello.py",
)