feat: enhance database transactions, add Makefiles, and improve Docker setup

- Refactored database batch operations to ensure transaction atomicity and simplify nested structure. - Added `Makefile` for `knowledge-base` and `llm-gateway` modules to streamline development workflows. - Simplified `Dockerfile` for `llm-gateway` by removing multi-stage builds and optimizing dependencies. - Improved code readability in `collection_manager` and `failover` modules with refined logic. - Minor fixes in `test_server` and Redis health check handling for better diagnostics.
2026-01-05 00:49:19 +01:00
parent db12937495
commit 4154dd5268
8 changed files with 259 additions and 119 deletions
--- a/mcp-servers/knowledge-base/Makefile
+++ b/mcp-servers/knowledge-base/Makefile
@@ -0,0 +1,79 @@
+.PHONY: help install install-dev lint lint-fix format type-check test test-cov validate clean run
+
+# Default target
+help:
+	@echo "Knowledge Base MCP Server - Development Commands"
+	@echo ""
+	@echo "Setup:"
+	@echo "  make install       - Install production dependencies"
+	@echo "  make install-dev   - Install development dependencies"
+	@echo ""
+	@echo "Quality Checks:"
+	@echo "  make lint          - Run Ruff linter"
+	@echo "  make lint-fix      - Run Ruff linter with auto-fix"
+	@echo "  make format        - Format code with Ruff"
+	@echo "  make type-check    - Run mypy type checker"
+	@echo ""
+	@echo "Testing:"
+	@echo "  make test          - Run pytest"
+	@echo "  make test-cov      - Run pytest with coverage"
+	@echo ""
+	@echo "All-in-one:"
+	@echo "  make validate      - Run lint, type-check, and tests"
+	@echo ""
+	@echo "Running:"
+	@echo "  make run           - Run the server locally"
+	@echo ""
+	@echo "Cleanup:"
+	@echo "  make clean         - Remove cache and build artifacts"
+
+# Setup
+install:
+	@echo "Installing production dependencies..."
+	@uv pip install -e .
+
+install-dev:
+	@echo "Installing development dependencies..."
+	@uv pip install -e ".[dev]"
+
+# Quality checks
+lint:
+	@echo "Running Ruff linter..."
+	@uv run ruff check .
+
+lint-fix:
+	@echo "Running Ruff linter with auto-fix..."
+	@uv run ruff check --fix .
+
+format:
+	@echo "Formatting code..."
+	@uv run ruff format .
+
+type-check:
+	@echo "Running mypy..."
+	@uv run mypy . --ignore-missing-imports
+
+# Testing
+test:
+	@echo "Running tests..."
+	@uv run pytest tests/ -v
+
+test-cov:
+	@echo "Running tests with coverage..."
+	@uv run pytest tests/ -v --cov=. --cov-report=term-missing --cov-report=html
+
+# All-in-one validation
+validate: lint type-check test
+	@echo "All validations passed!"
+
+# Running
+run:
+	@echo "Starting Knowledge Base server..."
+	@uv run python server.py
+
+# Cleanup
+clean:
+	@echo "Cleaning up..."
+	@rm -rf __pycache__ .pytest_cache .mypy_cache .ruff_cache .coverage htmlcov
+	@find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+	@find . -type f -name "*.pyc" -delete 2>/dev/null || true
--- a/mcp-servers/knowledge-base/collection_manager.py
+++ b/mcp-servers/knowledge-base/collection_manager.py
@@ -328,7 +328,7 @@ class CollectionManager:
                "source_path": chunk.source_path or source_path,
                "start_line": chunk.start_line,
                "end_line": chunk.end_line,
-                "file_type": (chunk.file_type or file_type).value if (chunk.file_type or file_type) else None,
+                "file_type": effective_file_type.value if (effective_file_type := chunk.file_type or file_type) else None,
            }
            embeddings_data.append((
                chunk.content,
--- a/mcp-servers/knowledge-base/database.py
+++ b/mcp-servers/knowledge-base/database.py
@@ -284,41 +284,40 @@ class DatabaseManager:
            )

        try:
-            async with self.acquire() as conn:
+            async with self.acquire() as conn, conn.transaction():
                # Wrap in transaction for all-or-nothing batch semantics
-                async with conn.transaction():
-                    for project_id, collection, content, embedding, chunk_type, metadata in embeddings:
-                        content_hash = self.compute_content_hash(content)
-                        source_path = metadata.get("source_path")
-                        start_line = metadata.get("start_line")
-                        end_line = metadata.get("end_line")
-                        file_type = metadata.get("file_type")
+                for project_id, collection, content, embedding, chunk_type, metadata in embeddings:
+                    content_hash = self.compute_content_hash(content)
+                    source_path = metadata.get("source_path")
+                    start_line = metadata.get("start_line")
+                    end_line = metadata.get("end_line")
+                    file_type = metadata.get("file_type")

-                        embedding_id = await conn.fetchval(
-                            """
-                            INSERT INTO knowledge_embeddings
-                            (project_id, collection, content, embedding, chunk_type,
-                             source_path, start_line, end_line, file_type, metadata,
-                             content_hash, expires_at)
-                            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
-                            ON CONFLICT DO NOTHING
-                            RETURNING id
-                            """,
-                            project_id,
-                            collection,
-                            content,
-                            embedding,
-                            chunk_type.value,
-                            source_path,
-                            start_line,
-                            end_line,
-                            file_type,
-                            metadata,
-                            content_hash,
-                            expires_at,
-                        )
-                        if embedding_id:
-                            ids.append(str(embedding_id))
+                    embedding_id = await conn.fetchval(
+                        """
+                        INSERT INTO knowledge_embeddings
+                        (project_id, collection, content, embedding, chunk_type,
+                         source_path, start_line, end_line, file_type, metadata,
+                         content_hash, expires_at)
+                        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
+                        ON CONFLICT DO NOTHING
+                        RETURNING id
+                        """,
+                        project_id,
+                        collection,
+                        content,
+                        embedding,
+                        chunk_type.value,
+                        source_path,
+                        start_line,
+                        end_line,
+                        file_type,
+                        metadata,
+                        content_hash,
+                        expires_at,
+                    )
+                    if embedding_id:
+                        ids.append(str(embedding_id))

            logger.info(f"Stored {len(ids)} embeddings in batch")
            return ids
@@ -566,59 +565,58 @@ class DatabaseManager:
            )

        try:
-            async with self.acquire() as conn:
+            async with self.acquire() as conn, conn.transaction():
                # Use transaction for atomic replace
-                async with conn.transaction():
-                    # First, delete existing embeddings for this source
-                    delete_result = await conn.execute(
+                # First, delete existing embeddings for this source
+                delete_result = await conn.execute(
+                    """
+                    DELETE FROM knowledge_embeddings
+                    WHERE project_id = $1 AND source_path = $2 AND collection = $3
+                    """,
+                    project_id,
+                    source_path,
+                    collection,
+                )
+                deleted_count = int(delete_result.split()[-1])
+
+                # Then insert new embeddings
+                new_ids = []
+                for content, embedding, chunk_type, metadata in embeddings:
+                    content_hash = self.compute_content_hash(content)
+                    start_line = metadata.get("start_line")
+                    end_line = metadata.get("end_line")
+                    file_type = metadata.get("file_type")
+
+                    embedding_id = await conn.fetchval(
                        """
-                        DELETE FROM knowledge_embeddings
-                        WHERE project_id = $1 AND source_path = $2 AND collection = $3
+                        INSERT INTO knowledge_embeddings
+                        (project_id, collection, content, embedding, chunk_type,
+                         source_path, start_line, end_line, file_type, metadata,
+                         content_hash, expires_at)
+                        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
+                        RETURNING id
                        """,
                        project_id,
-                        source_path,
                        collection,
+                        content,
+                        embedding,
+                        chunk_type.value,
+                        source_path,
+                        start_line,
+                        end_line,
+                        file_type,
+                        metadata,
+                        content_hash,
+                        expires_at,
                    )
-                    deleted_count = int(delete_result.split()[-1])
+                    if embedding_id:
+                        new_ids.append(str(embedding_id))

-                    # Then insert new embeddings
-                    new_ids = []
-                    for content, embedding, chunk_type, metadata in embeddings:
-                        content_hash = self.compute_content_hash(content)
-                        start_line = metadata.get("start_line")
-                        end_line = metadata.get("end_line")
-                        file_type = metadata.get("file_type")
-
-                        embedding_id = await conn.fetchval(
-                            """
-                            INSERT INTO knowledge_embeddings
-                            (project_id, collection, content, embedding, chunk_type,
-                             source_path, start_line, end_line, file_type, metadata,
-                             content_hash, expires_at)
-                            VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
-                            RETURNING id
-                            """,
-                            project_id,
-                            collection,
-                            content,
-                            embedding,
-                            chunk_type.value,
-                            source_path,
-                            start_line,
-                            end_line,
-                            file_type,
-                            metadata,
-                            content_hash,
-                            expires_at,
-                        )
-                        if embedding_id:
-                            new_ids.append(str(embedding_id))
-
-                    logger.info(
-                        f"Replaced source {source_path}: deleted {deleted_count}, "
-                        f"inserted {len(new_ids)} embeddings"
-                    )
-                    return deleted_count, new_ids
+                logger.info(
+                    f"Replaced source {source_path}: deleted {deleted_count}, "
+                    f"inserted {len(new_ids)} embeddings"
+                )
+                return deleted_count, new_ids

        except asyncpg.PostgresError as e:
            logger.error(f"Replace source error: {e}")
--- a/mcp-servers/knowledge-base/server.py
+++ b/mcp-servers/knowledge-base/server.py
@@ -193,7 +193,7 @@ async def health_check() -> dict[str, Any]:
    # Check Redis cache (non-critical - degraded without it)
    try:
        if _embeddings and _embeddings._redis:
-            await _embeddings._redis.ping()
+            await _embeddings._redis.ping()  # type: ignore[misc]
            status["dependencies"]["redis"] = "connected"
        else:
            status["dependencies"]["redis"] = "not initialized"
--- a/mcp-servers/knowledge-base/tests/test_server.py
+++ b/mcp-servers/knowledge-base/tests/test_server.py
@@ -1,8 +1,7 @@
 """Tests for server module and MCP tools."""

-import json
 from datetime import UTC, datetime
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock

 import pytest
 from fastapi.testclient import TestClient