""" Test fixtures for Knowledge Base MCP Server. """ import os import sys from datetime import UTC, datetime from unittest.mock import AsyncMock, MagicMock import pytest # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Set test mode before importing modules os.environ["IS_TEST"] = "true" os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test" os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0" os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001" @pytest.fixture def settings(): """Create test settings.""" from config import Settings, reset_settings reset_settings() return Settings( host="127.0.0.1", port=8002, debug=True, database_url="postgresql://test:test@localhost:5432/test", redis_url="redis://localhost:6379/0", llm_gateway_url="http://localhost:8001", embedding_dimension=1536, code_chunk_size=500, code_chunk_overlap=50, markdown_chunk_size=800, markdown_chunk_overlap=100, text_chunk_size=400, text_chunk_overlap=50, ) @pytest.fixture def mock_database(): """Create mock database manager.""" from database import DatabaseManager mock_db = MagicMock(spec=DatabaseManager) mock_db._pool = MagicMock() mock_db.acquire = MagicMock(return_value=AsyncMock()) # Mock database methods mock_db.initialize = AsyncMock() mock_db.close = AsyncMock() mock_db.store_embedding = AsyncMock(return_value="test-id-123") mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"]) mock_db.semantic_search = AsyncMock(return_value=[]) mock_db.keyword_search = AsyncMock(return_value=[]) mock_db.delete_by_source = AsyncMock(return_value=1) mock_db.delete_collection = AsyncMock(return_value=5) mock_db.delete_by_ids = AsyncMock(return_value=2) mock_db.replace_source_embeddings = AsyncMock(return_value=(1, ["new-id-1"])) mock_db.list_collections = AsyncMock(return_value=[]) mock_db.get_collection_stats = AsyncMock() mock_db.cleanup_expired = AsyncMock(return_value=0) return mock_db @pytest.fixture def mock_embeddings(): """Create mock embedding generator.""" from embeddings import EmbeddingGenerator mock_emb = MagicMock(spec=EmbeddingGenerator) mock_emb.initialize = AsyncMock() mock_emb.close = AsyncMock() # Generate fake embeddings (1536 dimensions) def fake_embedding() -> list[float]: return [0.1] * 1536 mock_emb.generate = AsyncMock(return_value=fake_embedding()) mock_emb.generate_batch = AsyncMock( side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts] ) return mock_emb @pytest.fixture def mock_redis(): """Create mock Redis client.""" import fakeredis.aioredis return fakeredis.aioredis.FakeRedis() @pytest.fixture def sample_python_code(): """Sample Python code for chunking tests.""" return '''"""Sample module for testing.""" import os from typing import Any class Calculator: """A simple calculator class.""" def __init__(self, initial: int = 0) -> None: """Initialize calculator.""" self.value = initial def add(self, x: int) -> int: """Add a value.""" self.value += x return self.value def subtract(self, x: int) -> int: """Subtract a value.""" self.value -= x return self.value def helper_function(data: dict[str, Any]) -> str: """A helper function.""" return str(data) async def async_function() -> None: """An async function.""" pass ''' @pytest.fixture def sample_markdown(): """Sample Markdown content for chunking tests.""" return """# Project Documentation This is the main documentation for our project. ## Getting Started To get started, follow these steps: 1. Install dependencies 2. Configure settings 3. Run the application ### Prerequisites You'll need the following installed: - Python 3.12+ - PostgreSQL - Redis ```python # Example code def main(): print("Hello, World!") ``` ## API Reference ### Search Endpoint The search endpoint allows you to query the knowledge base. **Endpoint:** `POST /api/search` **Request:** ```json { "query": "your search query", "limit": 10 } ``` ## Contributing We welcome contributions! Please see our contributing guide. """ @pytest.fixture def sample_text(): """Sample plain text for chunking tests.""" return """The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks. Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability. When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval. The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out! """ @pytest.fixture def sample_chunk(): """Sample chunk for testing.""" from models import Chunk, ChunkType, FileType return Chunk( content="def hello():\n print('Hello')", chunk_type=ChunkType.CODE, file_type=FileType.PYTHON, source_path="/test/hello.py", start_line=1, end_line=2, metadata={"function": "hello"}, token_count=15, ) @pytest.fixture def sample_embedding(): """Sample knowledge embedding for testing.""" from models import ChunkType, FileType, KnowledgeEmbedding return KnowledgeEmbedding( id="test-id-123", project_id="proj-123", collection="default", content="def hello():\n print('Hello')", embedding=[0.1] * 1536, chunk_type=ChunkType.CODE, source_path="/test/hello.py", start_line=1, end_line=2, file_type=FileType.PYTHON, metadata={"function": "hello"}, content_hash="abc123", created_at=datetime.now(UTC), updated_at=datetime.now(UTC), ) @pytest.fixture def sample_ingest_request(): """Sample ingest request for testing.""" from models import ChunkType, FileType, IngestRequest return IngestRequest( project_id="proj-123", agent_id="agent-456", content="def hello():\n print('Hello')", source_path="/test/hello.py", collection="default", chunk_type=ChunkType.CODE, file_type=FileType.PYTHON, metadata={"test": True}, ) @pytest.fixture def sample_search_request(): """Sample search request for testing.""" from models import SearchRequest, SearchType return SearchRequest( project_id="proj-123", agent_id="agent-456", query="hello function", search_type=SearchType.HYBRID, collection="default", limit=10, threshold=0.7, ) @pytest.fixture def sample_delete_request(): """Sample delete request for testing.""" from models import DeleteRequest return DeleteRequest( project_id="proj-123", agent_id="agent-456", source_path="/test/hello.py", )