refactor(knowledge-base mcp server): adjust formatting for consistency and readability

Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
2026-01-06 17:20:31 +01:00
parent 3f23bc3db3
commit 51404216ae
15 changed files with 306 additions and 155 deletions
--- a/mcp-servers/knowledge-base/chunking/base.py
+++ b/mcp-servers/knowledge-base/chunking/base.py
@@ -184,7 +184,12 @@ class ChunkerFactory:
        if file_type:
            if file_type == FileType.MARKDOWN:
                return self._get_markdown_chunker()
-            elif file_type in (FileType.TEXT, FileType.JSON, FileType.YAML, FileType.TOML):
+            elif file_type in (
+                FileType.TEXT,
+                FileType.JSON,
+                FileType.YAML,
+                FileType.TOML,
+            ):
                return self._get_text_chunker()
            else:
                # Code files
@@ -193,7 +198,9 @@ class ChunkerFactory:
        # Default to text chunker
        return self._get_text_chunker()

-    def get_chunker_for_path(self, source_path: str) -> tuple[BaseChunker, FileType | None]:
+    def get_chunker_for_path(
+        self, source_path: str
+    ) -> tuple[BaseChunker, FileType | None]:
        """
        Get chunker based on file path extension.

--- a/mcp-servers/knowledge-base/chunking/code.py
+++ b/mcp-servers/knowledge-base/chunking/code.py
@@ -151,7 +151,7 @@ class CodeChunker(BaseChunker):
        for struct_type, pattern in patterns.items():
            for match in pattern.finditer(content):
                # Convert character position to line number
-                line_num = content[:match.start()].count("\n")
+                line_num = content[: match.start()].count("\n")
                boundaries.append((line_num, struct_type))

        if not boundaries:
--- a/mcp-servers/knowledge-base/chunking/markdown.py
+++ b/mcp-servers/knowledge-base/chunking/markdown.py
@@ -69,9 +69,7 @@ class MarkdownChunker(BaseChunker):

        if not sections:
            # No headings, chunk as plain text
-            return self._chunk_text_block(
-                content, source_path, file_type, metadata, []
-            )
+            return self._chunk_text_block(content, source_path, file_type, metadata, [])

        chunks: list[Chunk] = []
        heading_stack: list[tuple[int, str]] = []  # (level, text)
@@ -292,7 +290,10 @@ class MarkdownChunker(BaseChunker):
                )

                # Overlap: include last paragraph if it fits
-                if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
+                if (
+                    current_content
+                    and self.count_tokens(current_content[-1]) <= self.chunk_overlap
+                ):
                    current_content = [current_content[-1]]
                    current_tokens = self.count_tokens(current_content[-1])
                else:
@@ -341,12 +342,14 @@ class MarkdownChunker(BaseChunker):
                    # Start of code block - save previous paragraph
                    if current_para and any(p.strip() for p in current_para):
                        para_content = "\n".join(current_para)
-                        paragraphs.append({
-                            "content": para_content,
-                            "tokens": self.count_tokens(para_content),
-                            "start_line": para_start,
-                            "end_line": i - 1,
-                        })
+                        paragraphs.append(
+                            {
+                                "content": para_content,
+                                "tokens": self.count_tokens(para_content),
+                                "start_line": para_start,
+                                "end_line": i - 1,
+                            }
+                        )
                    current_para = [line]
                    para_start = i
                    in_code_block = True
@@ -360,12 +363,14 @@ class MarkdownChunker(BaseChunker):
            if not line.strip():
                if current_para and any(p.strip() for p in current_para):
                    para_content = "\n".join(current_para)
-                    paragraphs.append({
-                        "content": para_content,
-                        "tokens": self.count_tokens(para_content),
-                        "start_line": para_start,
-                        "end_line": i - 1,
-                    })
+                    paragraphs.append(
+                        {
+                            "content": para_content,
+                            "tokens": self.count_tokens(para_content),
+                            "start_line": para_start,
+                            "end_line": i - 1,
+                        }
+                    )
                current_para = []
                para_start = i + 1
            else:
@@ -376,12 +381,14 @@ class MarkdownChunker(BaseChunker):
        # Final paragraph
        if current_para and any(p.strip() for p in current_para):
            para_content = "\n".join(current_para)
-            paragraphs.append({
-                "content": para_content,
-                "tokens": self.count_tokens(para_content),
-                "start_line": para_start,
-                "end_line": len(lines) - 1,
-            })
+            paragraphs.append(
+                {
+                    "content": para_content,
+                    "tokens": self.count_tokens(para_content),
+                    "start_line": para_start,
+                    "end_line": len(lines) - 1,
+                }
+            )

        return paragraphs

@@ -448,7 +455,10 @@ class MarkdownChunker(BaseChunker):
                )

                # Overlap with last sentence
-                if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
+                if (
+                    current_content
+                    and self.count_tokens(current_content[-1]) <= self.chunk_overlap
+                ):
                    current_content = [current_content[-1]]
                    current_tokens = self.count_tokens(current_content[-1])
                else:
--- a/mcp-servers/knowledge-base/chunking/text.py
+++ b/mcp-servers/knowledge-base/chunking/text.py
@@ -79,9 +79,7 @@ class TextChunker(BaseChunker):
            )

        # Fall back to sentence-based chunking
-        return self._chunk_by_sentences(
-            content, source_path, file_type, metadata
-        )
+        return self._chunk_by_sentences(content, source_path, file_type, metadata)

    def _split_paragraphs(self, content: str) -> list[dict[str, Any]]:
        """Split content into paragraphs."""
@@ -97,12 +95,14 @@ class TextChunker(BaseChunker):
                continue

            para_lines = para.count("\n") + 1
-            paragraphs.append({
-                "content": para,
-                "tokens": self.count_tokens(para),
-                "start_line": line_num,
-                "end_line": line_num + para_lines - 1,
-            })
+            paragraphs.append(
+                {
+                    "content": para,
+                    "tokens": self.count_tokens(para),
+                    "start_line": line_num,
+                    "end_line": line_num + para_lines - 1,
+                }
+            )
            line_num += para_lines + 1  # +1 for blank line between paragraphs

        return paragraphs
@@ -172,7 +172,10 @@ class TextChunker(BaseChunker):

                # Overlap: keep last paragraph if small enough
                overlap_para = None
-                if current_paras and self.count_tokens(current_paras[-1]) <= self.chunk_overlap:
+                if (
+                    current_paras
+                    and self.count_tokens(current_paras[-1]) <= self.chunk_overlap
+                ):
                    overlap_para = current_paras[-1]

                current_paras = [overlap_para] if overlap_para else []
@@ -266,7 +269,10 @@ class TextChunker(BaseChunker):

                # Overlap: keep last sentence if small enough
                overlap = None
-                if current_sentences and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap:
+                if (
+                    current_sentences
+                    and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap
+                ):
                    overlap = current_sentences[-1]

                current_sentences = [overlap] if overlap else []
@@ -317,14 +323,10 @@ class TextChunker(BaseChunker):
        sentences = self._split_sentences(text)

        if len(sentences) > 1:
-            return self._chunk_by_sentences(
-                text, source_path, file_type, metadata
-            )
+            return self._chunk_by_sentences(text, source_path, file_type, metadata)

        # Fall back to word-based splitting
-        return self._chunk_by_words(
-            text, source_path, file_type, metadata, base_line
-        )
+        return self._chunk_by_words(text, source_path, file_type, metadata, base_line)

    def _chunk_by_words(
        self,