refactor(knowledge-base mcp server): adjust formatting for consistency and readability

Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
2026-01-06 17:20:31 +01:00
parent 3f23bc3db3
commit 51404216ae
15 changed files with 306 additions and 155 deletions
--- a/mcp-servers/knowledge-base/tests/conftest.py
+++ b/mcp-servers/knowledge-base/tests/conftest.py
@@ -83,7 +83,9 @@ def mock_embeddings():
        return [0.1] * 1536

    mock_emb.generate = AsyncMock(return_value=fake_embedding())
-    mock_emb.generate_batch = AsyncMock(side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts])
+    mock_emb.generate_batch = AsyncMock(
+        side_effect=lambda texts, **_kwargs: [fake_embedding() for _ in texts]
+    )

    return mock_emb

@@ -137,7 +139,7 @@ async def async_function() -> None:
@pytest.fixture
 def sample_markdown():
    """Sample Markdown content for chunking tests."""
-    return '''# Project Documentation
+    return """# Project Documentation

 This is the main documentation for our project.

@@ -182,20 +184,20 @@ The search endpoint allows you to query the knowledge base.
 ## Contributing

 We welcome contributions! Please see our contributing guide.
-'''
+"""


@pytest.fixture
 def sample_text():
    """Sample plain text for chunking tests."""
-    return '''The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.
+    return """The quick brown fox jumps over the lazy dog. This is a sample text that we use for testing the text chunking functionality. It contains multiple sentences that should be properly split into chunks.

 Each paragraph represents a logical unit of text. The chunker should try to respect paragraph boundaries when possible. This helps maintain context and readability.

 When chunks need to be split mid-paragraph, the chunker should prefer sentence boundaries. This ensures that each chunk contains complete thoughts and is useful for retrieval.

 The final paragraph tests edge cases. What happens with short paragraphs? Do they get merged with adjacent content? Let's find out!
-'''
+"""


@pytest.fixture
--- a/mcp-servers/knowledge-base/tests/test_chunking.py
+++ b/mcp-servers/knowledge-base/tests/test_chunking.py
@@ -1,7 +1,6 @@
 """Tests for chunking module."""


-
 class TestBaseChunker:
    """Tests for base chunker functionality."""

@@ -149,7 +148,7 @@ class TestMarkdownChunker:
        """Test that chunker respects heading hierarchy."""
        from chunking.markdown import MarkdownChunker

-        markdown = '''# Main Title
+        markdown = """# Main Title

 Introduction paragraph.

@@ -164,7 +163,7 @@ More detailed content.
 ## Section Two

 Content for section two.
-'''
+"""

        chunker = MarkdownChunker(
            chunk_size=200,
@@ -188,7 +187,7 @@ Content for section two.
        """Test handling of code blocks in markdown."""
        from chunking.markdown import MarkdownChunker

-        markdown = '''# Code Example
+        markdown = """# Code Example

 Here's some code:

@@ -198,7 +197,7 @@ def hello():
 ```

 End of example.
-'''
+"""

        chunker = MarkdownChunker(
            chunk_size=500,
@@ -256,12 +255,12 @@ class TestTextChunker:
        """Test that chunker respects paragraph boundaries."""
        from chunking.text import TextChunker

-        text = '''First paragraph with some content.
+        text = """First paragraph with some content.

 Second paragraph with different content.

 Third paragraph to test chunking behavior.
-'''
+"""

        chunker = TextChunker(
            chunk_size=100,
--- a/mcp-servers/knowledge-base/tests/test_collection_manager.py
+++ b/mcp-servers/knowledge-base/tests/test_collection_manager.py
@@ -67,10 +67,14 @@ class TestCollectionManager:
        assert result.embeddings_generated == 0

    @pytest.mark.asyncio
-    async def test_ingest_error_handling(self, collection_manager, sample_ingest_request):
+    async def test_ingest_error_handling(
+        self, collection_manager, sample_ingest_request
+    ):
        """Test ingest error handling."""
        # Make embedding generation fail
-        collection_manager._embeddings.generate_batch.side_effect = Exception("Embedding error")
+        collection_manager._embeddings.generate_batch.side_effect = Exception(
+            "Embedding error"
+        )

        result = await collection_manager.ingest(sample_ingest_request)

@@ -182,7 +186,9 @@ class TestCollectionManager:
        )
        collection_manager._database.get_collection_stats.return_value = expected_stats

-        stats = await collection_manager.get_collection_stats("proj-123", "test-collection")
+        stats = await collection_manager.get_collection_stats(
+            "proj-123", "test-collection"
+        )

        assert stats.chunk_count == 100
        assert stats.unique_sources == 10
--- a/mcp-servers/knowledge-base/tests/test_embeddings.py
+++ b/mcp-servers/knowledge-base/tests/test_embeddings.py
@@ -17,19 +17,15 @@ class TestEmbeddingGenerator:
        response.raise_for_status = MagicMock()
        response.json.return_value = {
            "result": {
-                "content": [
-                    {
-                        "text": json.dumps({
-                            "embeddings": [[0.1] * 1536]
-                        })
-                    }
-                ]
+                "content": [{"text": json.dumps({"embeddings": [[0.1] * 1536]})}]
            }
        }
        return response

    @pytest.mark.asyncio
-    async def test_generate_single_embedding(self, settings, mock_redis, mock_http_response):
+    async def test_generate_single_embedding(
+        self, settings, mock_redis, mock_http_response
+    ):
        """Test generating a single embedding."""
        from embeddings import EmbeddingGenerator

@@ -67,9 +63,9 @@ class TestEmbeddingGenerator:
            "result": {
                "content": [
                    {
-                        "text": json.dumps({
-                            "embeddings": [[0.1] * 1536, [0.2] * 1536, [0.3] * 1536]
-                        })
+                        "text": json.dumps(
+                            {"embeddings": [[0.1] * 1536, [0.2] * 1536, [0.3] * 1536]}
+                        )
                    }
                ]
            }
@@ -166,9 +162,11 @@ class TestEmbeddingGenerator:
            "result": {
                "content": [
                    {
-                        "text": json.dumps({
-                            "embeddings": [[0.1] * 768]  # Wrong dimension
-                        })
+                        "text": json.dumps(
+                            {
+                                "embeddings": [[0.1] * 768]  # Wrong dimension
+                            }
+                        )
                    }
                ]
            }
--- a/mcp-servers/knowledge-base/tests/test_exceptions.py
+++ b/mcp-servers/knowledge-base/tests/test_exceptions.py
@@ -1,7 +1,6 @@
 """Tests for exception classes."""


-
 class TestErrorCode:
    """Tests for ErrorCode enum."""

@@ -10,8 +9,13 @@ class TestErrorCode:
        from exceptions import ErrorCode

        assert ErrorCode.UNKNOWN_ERROR.value == "KB_UNKNOWN_ERROR"
-        assert ErrorCode.DATABASE_CONNECTION_ERROR.value == "KB_DATABASE_CONNECTION_ERROR"
-        assert ErrorCode.EMBEDDING_GENERATION_ERROR.value == "KB_EMBEDDING_GENERATION_ERROR"
+        assert (
+            ErrorCode.DATABASE_CONNECTION_ERROR.value == "KB_DATABASE_CONNECTION_ERROR"
+        )
+        assert (
+            ErrorCode.EMBEDDING_GENERATION_ERROR.value
+            == "KB_EMBEDDING_GENERATION_ERROR"
+        )
        assert ErrorCode.CHUNKING_ERROR.value == "KB_CHUNKING_ERROR"
        assert ErrorCode.SEARCH_ERROR.value == "KB_SEARCH_ERROR"
        assert ErrorCode.COLLECTION_NOT_FOUND.value == "KB_COLLECTION_NOT_FOUND"
--- a/mcp-servers/knowledge-base/tests/test_search.py
+++ b/mcp-servers/knowledge-base/tests/test_search.py
@@ -59,7 +59,9 @@ class TestSearchEngine:
        ]

    @pytest.mark.asyncio
-    async def test_semantic_search(self, search_engine, sample_search_request, sample_db_results):
+    async def test_semantic_search(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test semantic search."""
        from models import SearchType

@@ -74,7 +76,9 @@ class TestSearchEngine:
        search_engine._database.semantic_search.assert_called_once()

    @pytest.mark.asyncio
-    async def test_keyword_search(self, search_engine, sample_search_request, sample_db_results):
+    async def test_keyword_search(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test keyword search."""
        from models import SearchType

@@ -88,7 +92,9 @@ class TestSearchEngine:
        search_engine._database.keyword_search.assert_called_once()

    @pytest.mark.asyncio
-    async def test_hybrid_search(self, search_engine, sample_search_request, sample_db_results):
+    async def test_hybrid_search(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test hybrid search."""
        from models import SearchType

@@ -105,7 +111,9 @@ class TestSearchEngine:
        assert len(response.results) >= 1

    @pytest.mark.asyncio
-    async def test_search_with_collection_filter(self, search_engine, sample_search_request, sample_db_results):
+    async def test_search_with_collection_filter(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test search with collection filter."""
        from models import SearchType

@@ -120,7 +128,9 @@ class TestSearchEngine:
        assert call_args.kwargs["collection"] == "specific-collection"

    @pytest.mark.asyncio
-    async def test_search_with_file_type_filter(self, search_engine, sample_search_request, sample_db_results):
+    async def test_search_with_file_type_filter(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test search with file type filter."""
        from models import FileType, SearchType

@@ -135,7 +145,9 @@ class TestSearchEngine:
        assert call_args.kwargs["file_types"] == [FileType.PYTHON]

    @pytest.mark.asyncio
-    async def test_search_respects_limit(self, search_engine, sample_search_request, sample_db_results):
+    async def test_search_respects_limit(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test that search respects result limit."""
        from models import SearchType

@@ -148,7 +160,9 @@ class TestSearchEngine:
        assert len(response.results) <= 1

    @pytest.mark.asyncio
-    async def test_search_records_time(self, search_engine, sample_search_request, sample_db_results):
+    async def test_search_records_time(
+        self, search_engine, sample_search_request, sample_db_results
+    ):
        """Test that search records time."""
        from models import SearchType

@@ -203,13 +217,21 @@ class TestReciprocalRankFusion:
        from models import SearchResult

        semantic = [
-            SearchResult(id="a", content="A", score=0.9, chunk_type="code", collection="default"),
-            SearchResult(id="b", content="B", score=0.8, chunk_type="code", collection="default"),
+            SearchResult(
+                id="a", content="A", score=0.9, chunk_type="code", collection="default"
+            ),
+            SearchResult(
+                id="b", content="B", score=0.8, chunk_type="code", collection="default"
+            ),
        ]

        keyword = [
-            SearchResult(id="b", content="B", score=0.85, chunk_type="code", collection="default"),
-            SearchResult(id="c", content="C", score=0.7, chunk_type="code", collection="default"),
+            SearchResult(
+                id="b", content="B", score=0.85, chunk_type="code", collection="default"
+            ),
+            SearchResult(
+                id="c", content="C", score=0.7, chunk_type="code", collection="default"
+            ),
        ]

        fused = search_engine._reciprocal_rank_fusion(semantic, keyword)
@@ -230,19 +252,23 @@ class TestReciprocalRankFusion:

        # Same results in same order
        results = [
-            SearchResult(id="a", content="A", score=0.9, chunk_type="code", collection="default"),
+            SearchResult(
+                id="a", content="A", score=0.9, chunk_type="code", collection="default"
+            ),
        ]

        # High semantic weight
        fused_semantic_heavy = search_engine._reciprocal_rank_fusion(
-            results, [],
+            results,
+            [],
            semantic_weight=0.9,
            keyword_weight=0.1,
        )

        # High keyword weight
        fused_keyword_heavy = search_engine._reciprocal_rank_fusion(
-            [], results,
+            [],
+            results,
            semantic_weight=0.1,
            keyword_weight=0.9,
        )
@@ -256,12 +282,18 @@ class TestReciprocalRankFusion:
        from models import SearchResult

        semantic = [
-            SearchResult(id="a", content="A", score=0.9, chunk_type="code", collection="default"),
-            SearchResult(id="b", content="B", score=0.8, chunk_type="code", collection="default"),
+            SearchResult(
+                id="a", content="A", score=0.9, chunk_type="code", collection="default"
+            ),
+            SearchResult(
+                id="b", content="B", score=0.8, chunk_type="code", collection="default"
+            ),
        ]

        keyword = [
-            SearchResult(id="c", content="C", score=0.7, chunk_type="code", collection="default"),
+            SearchResult(
+                id="c", content="C", score=0.7, chunk_type="code", collection="default"
+            ),
        ]

        fused = search_engine._reciprocal_rank_fusion(semantic, keyword)