chore(context): refactor for consistency, optimize formatting, and simplify logic

- Cleaned up unnecessary comments in `__all__` definitions for better readability.
- Adjusted indentation and formatting across modules for improved clarity (e.g., long lines, logical grouping).
- Simplified conditional expressions and inline comments for context scoring and ranking.
- Replaced some hard-coded values with type-safe annotations (e.g., `ClassVar`).
- Removed unused imports and ensured consistent usage across test files.
- Updated `test_score_not_cached_on_context` to clarify caching behavior.
- Improved truncation strategy logic and marker handling.
This commit is contained in:
2026-01-04 15:23:14 +01:00
parent 9e54f16e56
commit 2bea057fb1
26 changed files with 226 additions and 273 deletions

View File

@@ -78,7 +78,7 @@ class TruncationStrategy:
)
@property
def TRUNCATION_MARKER(self) -> str:
def truncation_marker(self) -> str:
"""Get truncation marker from settings."""
return self._settings.truncation_marker
@@ -141,7 +141,9 @@ class TruncationStrategy:
truncated_tokens=truncated_tokens,
content=truncated,
truncated=True,
truncation_ratio=0.0 if original_tokens == 0 else 1 - (truncated_tokens / original_tokens),
truncation_ratio=0.0
if original_tokens == 0
else 1 - (truncated_tokens / original_tokens),
)
async def _truncate_end(
@@ -156,17 +158,17 @@ class TruncationStrategy:
Simple but effective for most content types.
"""
# Binary search for optimal truncation point
marker_tokens = await self._count_tokens(self.TRUNCATION_MARKER, model)
marker_tokens = await self._count_tokens(self.truncation_marker, model)
available_tokens = max(0, max_tokens - marker_tokens)
# Edge case: if no tokens available for content, return just the marker
if available_tokens <= 0:
return self.TRUNCATION_MARKER
return self.truncation_marker
# Estimate characters per token (guard against division by zero)
content_tokens = await self._count_tokens(content, model)
if content_tokens == 0:
return content + self.TRUNCATION_MARKER
return content + self.truncation_marker
chars_per_token = len(content) / content_tokens
# Start with estimated position
@@ -188,7 +190,7 @@ class TruncationStrategy:
else:
high = mid - 1
return best + self.TRUNCATION_MARKER
return best + self.truncation_marker
async def _truncate_middle(
self,
@@ -201,7 +203,7 @@ class TruncationStrategy:
Good for code or content where context at boundaries matters.
"""
marker_tokens = await self._count_tokens(self.TRUNCATION_MARKER, model)
marker_tokens = await self._count_tokens(self.truncation_marker, model)
available_tokens = max_tokens - marker_tokens
# Split between start and end
@@ -218,7 +220,7 @@ class TruncationStrategy:
content, end_tokens, from_start=False, model=model
)
return start_content + self.TRUNCATION_MARKER + end_content
return start_content + self.truncation_marker + end_content
async def _truncate_sentence(
self,
@@ -236,7 +238,7 @@ class TruncationStrategy:
result: list[str] = []
total_tokens = 0
marker_tokens = await self._count_tokens(self.TRUNCATION_MARKER, model)
marker_tokens = await self._count_tokens(self.truncation_marker, model)
available = max_tokens - marker_tokens
for sentence in sentences:
@@ -248,7 +250,7 @@ class TruncationStrategy:
break
if len(result) < len(sentences):
return " ".join(result) + self.TRUNCATION_MARKER
return " ".join(result) + self.truncation_marker
return " ".join(result)
async def _get_content_for_tokens(