forked from cardosofelipe/fast-next-template
Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
286 lines
7.3 KiB
Python
286 lines
7.3 KiB
Python
"""
|
|
Test fixtures for Knowledge Base MCP Server.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
from datetime import UTC, datetime
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
import pytest
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
# Set test mode before importing modules
|
|
os.environ["IS_TEST"] = "true"
|
|
os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test"
|
|
os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0"
|
|
os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001"
|
|
|
|
|
|
@pytest.fixture
|
|
def settings():
|
|
"""Create test settings."""
|
|
from config import Settings, reset_settings
|
|
|
|
reset_settings()
|
|
return Settings(
|
|
host="127.0.0.1",
|
|
port=8002,
|
|
debug=True,
|
|
database_url="postgresql://test:test@localhost:5432/test",
|
|
redis_url="redis://localhost:6379/0",
|
|
llm_gateway_url="http://localhost:8001",
|
|
embedding_dimension=1536,
|
|
code_chunk_size=500,
|
|
code_chunk_overlap=50,
|
|
markdown_chunk_size=800,
|
|
markdown_chunk_overlap=100,
|
|
text_chunk_size=400,
|
|
text_chunk_overlap=50,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_database():
|
|
"""Create mock database manager."""
|
|
from database import DatabaseManager
|
|
|
|
mock_db = MagicMock(spec=DatabaseManager)
|
|
mock_db._pool = MagicMock()
|
|
mock_db.acquire = MagicMock(return_value=AsyncMock())
|
|
|
|
# Mock database methods
|
|
mock_db.initialize = AsyncMock()
|
|
mock_db.close = AsyncMock()
|
|
mock_db.store_embedding = AsyncMock(return_value="test-id-123")
|
|
mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"])
|
|
mock_db.semantic_search = AsyncMock(return_value=[])
|
|
mock_db.keyword_search = AsyncMock(return_value=[])
|
|
mock_db.delete_by_source = AsyncMock(return_value=1)
|
|
mock_db.delete_collection = AsyncMock(return_value=5)
|
|
mock_db.delete_by_ids = AsyncMock(return_value=2)
|
|
mock_db.replace_source_embeddings = AsyncMock(return_value=(1, ["new-id-1"]))
|
|
mock_db.list_collections = AsyncMock(return_value=[])
|
|
mock_db.get_collection_stats = AsyncMock()
|
|
mock_db.cleanup_expired = AsyncMock(return_value=0)
|
|
|
|
return mock_db
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_embeddings():
|
|
"""Create mock embedding generator."""
|
|
from embeddings import EmbeddingGenerator
|
|
|
|
mock_emb = MagicMock(spec=EmbeddingGenerator)
|
|
mock_emb.initialize = AsyncMock()
|
|
mock_emb.close = AsyncMock()
|
|
|
|
# Generate fake embeddings (1536 dimensions)
|
|
def fake_embedding() -> list[float]:
|
|
return [0.1] * 1536
|
|
|
|
mock_emb.generate = AsyncMock(return_value=fake_embedding())
|
|
mock_emb.generate_batch = AsyncMock(
|
|
side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts]
|
|
)
|
|
|
|
return mock_emb
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_redis():
|
|
"""Create mock Redis client."""
|
|
import fakeredis.aioredis
|
|
|
|
return fakeredis.aioredis.FakeRedis()
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_python_code():
|
|
"""Sample Python code for chunking tests."""
|
|
return '''"""Sample module for testing."""
|
|
|
|
import os
|
|
from typing import Any
|
|
|
|
|
|
class Calculator:
|
|
"""A simple calculator class."""
|
|
|
|
def __init__(self, initial: int = 0) -> None:
|
|
"""Initialize calculator."""
|
|
self.value = initial
|
|
|
|
def add(self, x: int) -> int:
|
|
"""Add a value."""
|
|
self.value += x
|
|
return self.value
|
|
|
|
def subtract(self, x: int) -> int:
|
|
"""Subtract a value."""
|
|
self.value -= x
|
|
return self.value
|
|
|
|
|
|
def helper_function(data: dict[str, Any]) -> str:
|
|
"""A helper function."""
|
|
return str(data)
|
|
|
|
|
|
async def async_function() -> None:
|
|
"""An async function."""
|
|
pass
|
|
'''
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_markdown():
|
|
"""Sample Markdown content for chunking tests."""
|
|
return """# Project Documentation
|
|
|
|
This is the main documentation for our project.
|
|
|
|
## Getting Started
|
|
|
|
To get started, follow these steps:
|
|
|
|
1. Install dependencies
|
|
2. Configure settings
|
|
3. Run the application
|
|
|
|
### Prerequisites
|
|
|
|
You'll need the following installed:
|
|
|
|
- Python 3.12+
|
|
- PostgreSQL
|
|
- Redis
|
|
|
|
```python
|
|
# Example code
|
|
def main():
|
|
print("Hello, World!")
|
|
```
|
|
|
|
## API Reference
|
|
|
|
### Search Endpoint
|
|
|
|
The search endpoint allows you to query the knowledge base.
|
|
|
|
**Endpoint:** `POST /api/search`
|
|
|
|
**Request:**
|
|
```json
|
|
{
|
|
"query": "your search query",
|
|
"limit": 10
|
|
}
|
|
```
|
|
|
|
## Contributing
|
|
|
|
We welcome contributions! Please see our contributing guide.
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_text():
|
|
"""Sample plain text for chunking tests."""
|
|
return """The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.
|
|
|
|
Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.
|
|
|
|
When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.
|
|
|
|
The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
|
|
"""
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_chunk():
|
|
"""Sample chunk for testing."""
|
|
from models import Chunk, ChunkType, FileType
|
|
|
|
return Chunk(
|
|
content="def hello():\n print('Hello')",
|
|
chunk_type=ChunkType.CODE,
|
|
file_type=FileType.PYTHON,
|
|
source_path="/test/hello.py",
|
|
start_line=1,
|
|
end_line=2,
|
|
metadata={"function": "hello"},
|
|
token_count=15,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_embedding():
|
|
"""Sample knowledge embedding for testing."""
|
|
from models import ChunkType, FileType, KnowledgeEmbedding
|
|
|
|
return KnowledgeEmbedding(
|
|
id="test-id-123",
|
|
project_id="proj-123",
|
|
collection="default",
|
|
content="def hello():\n print('Hello')",
|
|
embedding=[0.1] * 1536,
|
|
chunk_type=ChunkType.CODE,
|
|
source_path="/test/hello.py",
|
|
start_line=1,
|
|
end_line=2,
|
|
file_type=FileType.PYTHON,
|
|
metadata={"function": "hello"},
|
|
content_hash="abc123",
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_ingest_request():
|
|
"""Sample ingest request for testing."""
|
|
from models import ChunkType, FileType, IngestRequest
|
|
|
|
return IngestRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
content="def hello():\n print('Hello')",
|
|
source_path="/test/hello.py",
|
|
collection="default",
|
|
chunk_type=ChunkType.CODE,
|
|
file_type=FileType.PYTHON,
|
|
metadata={"test": True},
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_search_request():
|
|
"""Sample search request for testing."""
|
|
from models import SearchRequest, SearchType
|
|
|
|
return SearchRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
query="hello function",
|
|
search_type=SearchType.HYBRID,
|
|
collection="default",
|
|
limit=10,
|
|
threshold=0.7,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_delete_request():
|
|
"""Sample delete request for testing."""
|
|
from models import DeleteRequest
|
|
|
|
return DeleteRequest(
|
|
project_id="proj-123",
|
|
agent_id="agent-456",
|
|
source_path="/test/hello.py",
|
|
)
|