forked from cardosofelipe/fast-next-template
feat(context): enhance performance, caching, and settings management
- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
This commit is contained in:
@@ -10,6 +10,7 @@ import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ..config import ContextSettings, get_context_settings
|
||||
from ..types import BaseContext, ContextType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -45,26 +46,41 @@ class TruncationStrategy:
|
||||
4. Semantic chunking: Keep most relevant chunks
|
||||
"""
|
||||
|
||||
# Default truncation marker
|
||||
TRUNCATION_MARKER = "\n\n[...content truncated...]\n\n"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
calculator: "TokenCalculator | None" = None,
|
||||
preserve_ratio_start: float = 0.7, # Keep 70% from start by default
|
||||
min_content_length: int = 100, # Minimum characters to keep
|
||||
preserve_ratio_start: float | None = None,
|
||||
min_content_length: int | None = None,
|
||||
settings: ContextSettings | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize truncation strategy.
|
||||
|
||||
Args:
|
||||
calculator: Token calculator for accurate counting
|
||||
preserve_ratio_start: Ratio of content to keep from start
|
||||
min_content_length: Minimum characters to preserve
|
||||
preserve_ratio_start: Ratio of content to keep from start (overrides settings)
|
||||
min_content_length: Minimum characters to preserve (overrides settings)
|
||||
settings: Context settings (uses global if None)
|
||||
"""
|
||||
self._settings = settings or get_context_settings()
|
||||
self._calculator = calculator
|
||||
self._preserve_ratio_start = preserve_ratio_start
|
||||
self._min_content_length = min_content_length
|
||||
|
||||
# Use provided values or fall back to settings
|
||||
self._preserve_ratio_start = (
|
||||
preserve_ratio_start
|
||||
if preserve_ratio_start is not None
|
||||
else self._settings.truncation_preserve_ratio
|
||||
)
|
||||
self._min_content_length = (
|
||||
min_content_length
|
||||
if min_content_length is not None
|
||||
else self._settings.truncation_min_content_length
|
||||
)
|
||||
|
||||
@property
|
||||
def TRUNCATION_MARKER(self) -> str:
|
||||
"""Get truncation marker from settings."""
|
||||
return self._settings.truncation_marker
|
||||
|
||||
def set_calculator(self, calculator: "TokenCalculator") -> None:
|
||||
"""Set token calculator."""
|
||||
@@ -125,7 +141,7 @@ class TruncationStrategy:
|
||||
truncated_tokens=truncated_tokens,
|
||||
content=truncated,
|
||||
truncated=True,
|
||||
truncation_ratio=1 - (truncated_tokens / original_tokens),
|
||||
truncation_ratio=0.0 if original_tokens == 0 else 1 - (truncated_tokens / original_tokens),
|
||||
)
|
||||
|
||||
async def _truncate_end(
|
||||
@@ -141,10 +157,17 @@ class TruncationStrategy:
|
||||
"""
|
||||
# Binary search for optimal truncation point
|
||||
marker_tokens = await self._count_tokens(self.TRUNCATION_MARKER, model)
|
||||
available_tokens = max_tokens - marker_tokens
|
||||
available_tokens = max(0, max_tokens - marker_tokens)
|
||||
|
||||
# Estimate characters per token
|
||||
chars_per_token = len(content) / await self._count_tokens(content, model)
|
||||
# Edge case: if no tokens available for content, return just the marker
|
||||
if available_tokens <= 0:
|
||||
return self.TRUNCATION_MARKER
|
||||
|
||||
# Estimate characters per token (guard against division by zero)
|
||||
content_tokens = await self._count_tokens(content, model)
|
||||
if content_tokens == 0:
|
||||
return content + self.TRUNCATION_MARKER
|
||||
chars_per_token = len(content) / content_tokens
|
||||
|
||||
# Start with estimated position
|
||||
estimated_chars = int(available_tokens * chars_per_token)
|
||||
@@ -243,7 +266,9 @@ class TruncationStrategy:
|
||||
if current_tokens <= target_tokens:
|
||||
return content
|
||||
|
||||
# Estimate characters
|
||||
# Estimate characters (guard against division by zero)
|
||||
if current_tokens == 0:
|
||||
return content
|
||||
chars_per_token = len(content) / current_tokens
|
||||
estimated_chars = int(target_tokens * chars_per_token)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user