refactor(knowledge-base mcp server): adjust formatting for consistency and readability
Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
This commit is contained in:
@@ -79,9 +79,7 @@ class TextChunker(BaseChunker):
|
||||
)
|
||||
|
||||
# Fall back to sentence-based chunking
|
||||
return self._chunk_by_sentences(
|
||||
content, source_path, file_type, metadata
|
||||
)
|
||||
return self._chunk_by_sentences(content, source_path, file_type, metadata)
|
||||
|
||||
def _split_paragraphs(self, content: str) -> list[dict[str, Any]]:
|
||||
"""Split content into paragraphs."""
|
||||
@@ -97,12 +95,14 @@ class TextChunker(BaseChunker):
|
||||
continue
|
||||
|
||||
para_lines = para.count("\n") + 1
|
||||
paragraphs.append({
|
||||
"content": para,
|
||||
"tokens": self.count_tokens(para),
|
||||
"start_line": line_num,
|
||||
"end_line": line_num + para_lines - 1,
|
||||
})
|
||||
paragraphs.append(
|
||||
{
|
||||
"content": para,
|
||||
"tokens": self.count_tokens(para),
|
||||
"start_line": line_num,
|
||||
"end_line": line_num + para_lines - 1,
|
||||
}
|
||||
)
|
||||
line_num += para_lines + 1 # +1 for blank line between paragraphs
|
||||
|
||||
return paragraphs
|
||||
@@ -172,7 +172,10 @@ class TextChunker(BaseChunker):
|
||||
|
||||
# Overlap: keep last paragraph if small enough
|
||||
overlap_para = None
|
||||
if current_paras and self.count_tokens(current_paras[-1]) <= self.chunk_overlap:
|
||||
if (
|
||||
current_paras
|
||||
and self.count_tokens(current_paras[-1]) <= self.chunk_overlap
|
||||
):
|
||||
overlap_para = current_paras[-1]
|
||||
|
||||
current_paras = [overlap_para] if overlap_para else []
|
||||
@@ -266,7 +269,10 @@ class TextChunker(BaseChunker):
|
||||
|
||||
# Overlap: keep last sentence if small enough
|
||||
overlap = None
|
||||
if current_sentences and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap:
|
||||
if (
|
||||
current_sentences
|
||||
and self.count_tokens(current_sentences[-1]) <= self.chunk_overlap
|
||||
):
|
||||
overlap = current_sentences[-1]
|
||||
|
||||
current_sentences = [overlap] if overlap else []
|
||||
@@ -317,14 +323,10 @@ class TextChunker(BaseChunker):
|
||||
sentences = self._split_sentences(text)
|
||||
|
||||
if len(sentences) > 1:
|
||||
return self._chunk_by_sentences(
|
||||
text, source_path, file_type, metadata
|
||||
)
|
||||
return self._chunk_by_sentences(text, source_path, file_type, metadata)
|
||||
|
||||
# Fall back to word-based splitting
|
||||
return self._chunk_by_words(
|
||||
text, source_path, file_type, metadata, base_line
|
||||
)
|
||||
return self._chunk_by_words(text, source_path, file_type, metadata, base_line)
|
||||
|
||||
def _chunk_by_words(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user