refactor(knowledge-base mcp server): adjust formatting for consistency and readability

Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
This commit is contained in:
2026-01-06 17:20:31 +01:00
parent 3f23bc3db3
commit 51404216ae
15 changed files with 306 additions and 155 deletions

View File

@@ -184,7 +184,12 @@ class ChunkerFactory:
if file_type:
if file_type == FileType.MARKDOWN:
return self._get_markdown_chunker()
elif file_type in (FileType.TEXT, FileType.JSON, FileType.YAML, FileType.TOML):
elif file_type in (
FileType.TEXT,
FileType.JSON,
FileType.YAML,
FileType.TOML,
):
return self._get_text_chunker()
else:
# Code files
@@ -193,7 +198,9 @@ class ChunkerFactory:
# Default to text chunker
return self._get_text_chunker()
def get_chunker_for_path(self, source_path: str) -> tuple[BaseChunker, FileType | None]:
def get_chunker_for_path(
self, source_path: str
) -> tuple[BaseChunker, FileType | None]:
"""
Get chunker based on file path extension.

View File

@@ -151,7 +151,7 @@ class CodeChunker(BaseChunker):
for struct_type, pattern in patterns.items():
for match in pattern.finditer(content):
# Convert character position to line number
line_num = content[:match.start()].count("\n")
line_num = content[: match.start()].count("\n")
boundaries.append((line_num, struct_type))
if not boundaries:

View File

@@ -69,9 +69,7 @@ class MarkdownChunker(BaseChunker):
if not sections:
# No headings, chunk as plain text
return self._chunk_text_block(
content, source_path, file_type, metadata, []
)
return self._chunk_text_block(content, source_path, file_type, metadata, [])
chunks: list[Chunk] = []
heading_stack: list[tuple[int, str]] = [] # (level, text)
@@ -292,7 +290,10 @@ class MarkdownChunker(BaseChunker):
)
# Overlap: include last paragraph if it fits
if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
if (
current_content
and self.count_tokens(current_content[-1]) <= self.chunk_overlap
):
current_content = [current_content[-1]]
current_tokens = self.count_tokens(current_content[-1])
else:
@@ -341,12 +342,14 @@ class MarkdownChunker(BaseChunker):
# Start of code block - save previous paragraph
if current_para and any(p.strip() for p in current_para):
para_content = "\n".join(current_para)
paragraphs.append({
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": i - 1,
})
paragraphs.append(
{
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": i - 1,
}
)
current_para = [line]
para_start = i
in_code_block = True
@@ -360,12 +363,14 @@ class MarkdownChunker(BaseChunker):
if not line.strip():
if current_para and any(p.strip() for p in current_para):
para_content = "\n".join(current_para)
paragraphs.append({
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": i - 1,
})
paragraphs.append(
{
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": i - 1,
}
)
current_para = []
para_start = i + 1
else:
@@ -376,12 +381,14 @@ class MarkdownChunker(BaseChunker):
# Final paragraph
if current_para and any(p.strip() for p in current_para):
para_content = "\n".join(current_para)
paragraphs.append({
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": len(lines) - 1,
})
paragraphs.append(
{
"content": para_content,
"tokens": self.count_tokens(para_content),
"start_line": para_start,
"end_line": len(lines) - 1,
}
)
return paragraphs
@@ -448,7 +455,10 @@ class MarkdownChunker(BaseChunker):
)
# Overlap with last sentence
if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
if (
current_content
and self.count_tokens(current_content[-1]) <= self.chunk_overlap
):
current_content = [current_content[-1]]
current_tokens = self.count_tokens(current_content[-1])
else:

View File

@@ -79,9 +79,7 @@ class TextChunker(BaseChunker):
)
# Fall back to sentence-based chunking
return self._chunk_by_sentences(
content, source_path, file_type, metadata
)
return self._chunk_by_sentences(content, source_path, file_type, metadata)
def _split_paragraphs(self, content: str) -> list[dict[str, Any]]:
"""Split content into paragraphs."""
@@ -97,12 +95,14 @@ class TextChunker(BaseChunker):
continue
para_lines = para.count("\n") + 1
paragraphs.append({
"content": para,
"tokens": self.count_tokens(para),
"start_line": line_num,
"end_line": line_num + para_lines - 1,
})
paragraphs.append(
{
"content": para,
"tokens": self.count_tokens(para),
"start_line": line_num,
"end_line": line_num + para_lines - 1,
}
)
line_num += para_lines + 1 # +1 for blank line between paragraphs
return paragraphs
@@ -172,7 +172,10 @@ class TextChunker(BaseChunker):
# Overlap: keep last paragraph if small enough
overlap_para = None
if current_paras and self.count_tokens(current_paras[-1]) <= self.chunk_overlap:
if (
current_paras
and self.count_tokens(current_paras[-1]) <= self.chunk_overlap
):
overlap_para = current_paras[-1]
current_paras = [overlap_para] if overlap_para else []
@@ -266,7 +269,10 @@ class TextChunker(BaseChunker):
# Overlap: keep last sentence if small enough
overlap = None
if current_sentences and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap:
if (
current_sentences
and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap
):
overlap = current_sentences[-1]
current_sentences = [overlap] if overlap else []
@@ -317,14 +323,10 @@ class TextChunker(BaseChunker):
sentences = self._split_sentences(text)
if len(sentences) > 1:
return self._chunk_by_sentences(
text, source_path, file_type, metadata
)
return self._chunk_by_sentences(text, source_path, file_type, metadata)
# Fall back to word-based splitting
return self._chunk_by_words(
text, source_path, file_type, metadata, base_line
)
return self._chunk_by_words(text, source_path, file_type, metadata, base_line)
def _chunk_by_words(
self,