refactor(knowledge-base mcp server): adjust formatting for consistency and readability

Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
This commit is contained in:
2026-01-06 17:20:31 +01:00
parent 3f23bc3db3
commit 51404216ae
15 changed files with 306 additions and 155 deletions

View File

@@ -79,9 +79,7 @@ class TextChunker(BaseChunker):
)
# Fall back to sentence-based chunking
return self._chunk_by_sentences(
content, source_path, file_type, metadata
)
return self._chunk_by_sentences(content, source_path, file_type, metadata)
def _split_paragraphs(self, content: str) -> list[dict[str, Any]]:
"""Split content into paragraphs."""
@@ -97,12 +95,14 @@ class TextChunker(BaseChunker):
continue
para_lines = para.count("\n") + 1
paragraphs.append({
"content": para,
"tokens": self.count_tokens(para),
"start_line": line_num,
"end_line": line_num + para_lines - 1,
})
paragraphs.append(
{
"content": para,
"tokens": self.count_tokens(para),
"start_line": line_num,
"end_line": line_num + para_lines - 1,
}
)
line_num += para_lines + 1 # +1 for blank line between paragraphs
return paragraphs
@@ -172,7 +172,10 @@ class TextChunker(BaseChunker):
# Overlap: keep last paragraph if small enough
overlap_para = None
if current_paras and self.count_tokens(current_paras[-1]) <= self.chunk_overlap:
if (
current_paras
and self.count_tokens(current_paras[-1]) <= self.chunk_overlap
):
overlap_para = current_paras[-1]
current_paras = [overlap_para] if overlap_para else []
@@ -266,7 +269,10 @@ class TextChunker(BaseChunker):
# Overlap: keep last sentence if small enough
overlap = None
if current_sentences and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap:
if (
current_sentences
and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap
):
overlap = current_sentences[-1]
current_sentences = [overlap] if overlap else []
@@ -317,14 +323,10 @@ class TextChunker(BaseChunker):
sentences = self._split_sentences(text)
if len(sentences) > 1:
return self._chunk_by_sentences(
text, source_path, file_type, metadata
)
return self._chunk_by_sentences(text, source_path, file_type, metadata)
# Fall back to word-based splitting
return self._chunk_by_words(
text, source_path, file_type, metadata, base_line
)
return self._chunk_by_words(text, source_path, file_type, metadata, base_line)
def _chunk_by_words(
self,