syndarix/mcp-servers/knowledge-base/tests/conftest.py

"""
Test fixtures for Knowledge Base MCP Server.
"""

import os
import sys
from datetime import UTC, datetime
from unittest.mock import AsyncMock, MagicMock

import pytest

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Set test mode before importing modules
os.environ["IS_TEST"] = "true"
os.environ["KB_DATABASE_URL"] = "postgresql://test:test@localhost:5432/test"
os.environ["KB_REDIS_URL"] = "redis://localhost:6379/0"
os.environ["KB_LLM_GATEWAY_URL"] = "http://localhost:8001"


@pytest.fixture
def settings():
    """Create test settings."""
    from config import Settings, reset_settings

    reset_settings()
    return Settings(
        host="127.0.0.1",
        port=8002,
        debug=True,
        database_url="postgresql://test:test@localhost:5432/test",
        redis_url="redis://localhost:6379/0",
        llm_gateway_url="http://localhost:8001",
        embedding_dimension=1536,
        code_chunk_size=500,
        code_chunk_overlap=50,
        markdown_chunk_size=800,
        markdown_chunk_overlap=100,
        text_chunk_size=400,
        text_chunk_overlap=50,
    )


@pytest.fixture
def mock_database():
    """Create mock database manager."""
    from database import DatabaseManager

    mock_db = MagicMock(spec=DatabaseManager)
    mock_db._pool = MagicMock()
    mock_db.acquire = MagicMock(return_value=AsyncMock())

    # Mock database methods
    mock_db.initialize = AsyncMock()
    mock_db.close = AsyncMock()
    mock_db.store_embedding = AsyncMock(return_value="test-id-123")
    mock_db.store_embeddings_batch = AsyncMock(return_value=["id-1", "id-2"])
    mock_db.semantic_search = AsyncMock(return_value=[])
    mock_db.keyword_search = AsyncMock(return_value=[])
    mock_db.delete_by_source = AsyncMock(return_value=1)
    mock_db.delete_collection = AsyncMock(return_value=5)
    mock_db.delete_by_ids = AsyncMock(return_value=2)
    mock_db.replace_source_embeddings = AsyncMock(return_value=(1, ["new-id-1"]))
    mock_db.list_collections = AsyncMock(return_value=[])
    mock_db.get_collection_stats = AsyncMock()
    mock_db.cleanup_expired = AsyncMock(return_value=0)

    return mock_db


@pytest.fixture
def mock_embeddings():
    """Create mock embedding generator."""
    from embeddings import EmbeddingGenerator

    mock_emb = MagicMock(spec=EmbeddingGenerator)
    mock_emb.initialize = AsyncMock()
    mock_emb.close = AsyncMock()

    # Generate fake embeddings (1536 dimensions)
    def fake_embedding() -> list[float]:
        return [0.1] * 1536

    mock_emb.generate = AsyncMock(return_value=fake_embedding())
    mock_emb.generate_batch = AsyncMock(side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts])

    return mock_emb


@pytest.fixture
def mock_redis():
    """Create mock Redis client."""
    import fakeredis.aioredis

    return fakeredis.aioredis.FakeRedis()


@pytest.fixture
def sample_python_code():
    """Sample Python code for chunking tests."""
    return '''"""Sample module for testing."""

import os
from typing import Any


class Calculator:
    """A simple calculator class."""

    def __init__(self, initial: int = 0) -> None:
        """Initialize calculator."""
        self.value = initial

    def add(self, x: int) -> int:
        """Add a value."""
        self.value += x
        return self.value

    def subtract(self, x: int) -> int:
        """Subtract a value."""
        self.value -= x
        return self.value


def helper_function(data: dict[str, Any]) -> str:
    """A helper function."""
    return str(data)


async def async_function() -> None:
    """An async function."""
    pass
'''


@pytest.fixture
def sample_markdown():
    """Sample Markdown content for chunking tests."""
    return '''# Project Documentation

This is the main documentation for our project.

## Getting Started

To get started, follow these steps:

1. Install dependencies
2. Configure settings
3. Run the application

### Prerequisites

You'll need the following installed:

- Python 3.12+
- PostgreSQL
- Redis

```python
# Example code
def main():
    print("Hello, World!")
```

## API Reference

### Search Endpoint

The search endpoint allows you to query the knowledge base.

**Endpoint:** `POST /api/search`

**Request:**
```json
{
  "query": "your search query",
  "limit": 10
}
```

## Contributing

We welcome contributions! Please see our contributing guide.
'''


@pytest.fixture
def sample_text():
    """Sample plain text for chunking tests."""
    return '''The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.

Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.

When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.

The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
'''


@pytest.fixture
def sample_chunk():
    """Sample chunk for testing."""
    from models import Chunk, ChunkType, FileType

    return Chunk(
        content="def hello():\n    print('Hello')",
        chunk_type=ChunkType.CODE,
        file_type=FileType.PYTHON,
        source_path="/test/hello.py",
        start_line=1,
        end_line=2,
        metadata={"function": "hello"},
        token_count=15,
    )


@pytest.fixture
def sample_embedding():
    """Sample knowledge embedding for testing."""
    from models import ChunkType, FileType, KnowledgeEmbedding

    return KnowledgeEmbedding(
        id="test-id-123",
        project_id="proj-123",
        collection="default",
        content="def hello():\n    print('Hello')",
        embedding=[0.1] * 1536,
        chunk_type=ChunkType.CODE,
        source_path="/test/hello.py",
        start_line=1,
        end_line=2,
        file_type=FileType.PYTHON,
        metadata={"function": "hello"},
        content_hash="abc123",
        created_at=datetime.now(UTC),
        updated_at=datetime.now(UTC),
    )


@pytest.fixture
def sample_ingest_request():
    """Sample ingest request for testing."""
    from models import ChunkType, FileType, IngestRequest

    return IngestRequest(
        project_id="proj-123",
        agent_id="agent-456",
        content="def hello():\n    print('Hello')",
        source_path="/test/hello.py",
        collection="default",
        chunk_type=ChunkType.CODE,
        file_type=FileType.PYTHON,
        metadata={"test": True},
    )


@pytest.fixture
def sample_search_request():
    """Sample search request for testing."""
    from models import SearchRequest, SearchType

    return SearchRequest(
        project_id="proj-123",
        agent_id="agent-456",
        query="hello function",
        search_type=SearchType.HYBRID,
        collection="default",
        limit=10,
        threshold=0.7,
    )


@pytest.fixture
def sample_delete_request():
    """Sample delete request for testing."""
    from models import DeleteRequest

    return DeleteRequest(
        project_id="proj-123",
        agent_id="agent-456",
        source_path="/test/hello.py",
    )