refactor(knowledge-base mcp server): adjust formatting for consistency and readability
Improved code formatting, line breaks, and indentation across chunking logic and multiple test modules to enhance code clarity and maintain consistent style. No functional changes made.
This commit is contained in:
@@ -69,9 +69,7 @@ class MarkdownChunker(BaseChunker):
|
||||
|
||||
if not sections:
|
||||
# No headings, chunk as plain text
|
||||
return self._chunk_text_block(
|
||||
content, source_path, file_type, metadata, []
|
||||
)
|
||||
return self._chunk_text_block(content, source_path, file_type, metadata, [])
|
||||
|
||||
chunks: list[Chunk] = []
|
||||
heading_stack: list[tuple[int, str]] = [] # (level, text)
|
||||
@@ -292,7 +290,10 @@ class MarkdownChunker(BaseChunker):
|
||||
)
|
||||
|
||||
# Overlap: include last paragraph if it fits
|
||||
if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
|
||||
if (
|
||||
current_content
|
||||
and self.count_tokens(current_content[-1]) <= self.chunk_overlap
|
||||
):
|
||||
current_content = [current_content[-1]]
|
||||
current_tokens = self.count_tokens(current_content[-1])
|
||||
else:
|
||||
@@ -341,12 +342,14 @@ class MarkdownChunker(BaseChunker):
|
||||
# Start of code block - save previous paragraph
|
||||
if current_para and any(p.strip() for p in current_para):
|
||||
para_content = "\n".join(current_para)
|
||||
paragraphs.append({
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": i - 1,
|
||||
})
|
||||
paragraphs.append(
|
||||
{
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": i - 1,
|
||||
}
|
||||
)
|
||||
current_para = [line]
|
||||
para_start = i
|
||||
in_code_block = True
|
||||
@@ -360,12 +363,14 @@ class MarkdownChunker(BaseChunker):
|
||||
if not line.strip():
|
||||
if current_para and any(p.strip() for p in current_para):
|
||||
para_content = "\n".join(current_para)
|
||||
paragraphs.append({
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": i - 1,
|
||||
})
|
||||
paragraphs.append(
|
||||
{
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": i - 1,
|
||||
}
|
||||
)
|
||||
current_para = []
|
||||
para_start = i + 1
|
||||
else:
|
||||
@@ -376,12 +381,14 @@ class MarkdownChunker(BaseChunker):
|
||||
# Final paragraph
|
||||
if current_para and any(p.strip() for p in current_para):
|
||||
para_content = "\n".join(current_para)
|
||||
paragraphs.append({
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": len(lines) - 1,
|
||||
})
|
||||
paragraphs.append(
|
||||
{
|
||||
"content": para_content,
|
||||
"tokens": self.count_tokens(para_content),
|
||||
"start_line": para_start,
|
||||
"end_line": len(lines) - 1,
|
||||
}
|
||||
)
|
||||
|
||||
return paragraphs
|
||||
|
||||
@@ -448,7 +455,10 @@ class MarkdownChunker(BaseChunker):
|
||||
)
|
||||
|
||||
# Overlap with last sentence
|
||||
if current_content and self.count_tokens(current_content[-1]) <= self.chunk_overlap:
|
||||
if (
|
||||
current_content
|
||||
and self.count_tokens(current_content[-1]) <= self.chunk_overlap
|
||||
):
|
||||
current_content = [current_content[-1]]
|
||||
current_tokens = self.count_tokens(current_content[-1])
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user