forked from cardosofelipe/fast-next-template
feat(context): enhance performance, caching, and settings management
- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
This commit is contained in:
@@ -9,6 +9,7 @@ from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from ..budget import TokenBudget, TokenCalculator
|
||||
from ..config import ContextSettings, get_context_settings
|
||||
from ..scoring.composite import CompositeScorer, ScoredContext
|
||||
from ..types import BaseContext
|
||||
|
||||
@@ -45,6 +46,7 @@ class ContextRanker:
|
||||
self,
|
||||
scorer: CompositeScorer | None = None,
|
||||
calculator: TokenCalculator | None = None,
|
||||
settings: ContextSettings | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize context ranker.
|
||||
@@ -52,7 +54,9 @@ class ContextRanker:
|
||||
Args:
|
||||
scorer: Composite scorer for scoring contexts
|
||||
calculator: Token calculator for counting tokens
|
||||
settings: Context settings (uses global if None)
|
||||
"""
|
||||
self._settings = settings or get_context_settings()
|
||||
self._scorer = scorer or CompositeScorer()
|
||||
self._calculator = calculator or TokenCalculator()
|
||||
|
||||
@@ -226,16 +230,32 @@ class ContextRanker:
|
||||
"""
|
||||
Ensure all contexts have token counts.
|
||||
|
||||
Counts tokens in parallel for contexts that don't have counts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to check
|
||||
model: Model for token counting
|
||||
"""
|
||||
for context in contexts:
|
||||
if context.token_count is None:
|
||||
count = await self._calculator.count_tokens(
|
||||
context.content, model
|
||||
)
|
||||
context.token_count = count
|
||||
import asyncio
|
||||
|
||||
# Find contexts needing counts
|
||||
contexts_needing_counts = [
|
||||
ctx for ctx in contexts if ctx.token_count is None
|
||||
]
|
||||
|
||||
if not contexts_needing_counts:
|
||||
return
|
||||
|
||||
# Count all in parallel
|
||||
tasks = [
|
||||
self._calculator.count_tokens(ctx.content, model)
|
||||
for ctx in contexts_needing_counts
|
||||
]
|
||||
counts = await asyncio.gather(*tasks)
|
||||
|
||||
# Assign counts back
|
||||
for ctx, count in zip(contexts_needing_counts, counts):
|
||||
ctx.token_count = count
|
||||
|
||||
def _count_by_type(
|
||||
self, scored_contexts: list[ScoredContext]
|
||||
@@ -255,7 +275,7 @@ class ContextRanker:
|
||||
async def rerank_for_diversity(
|
||||
self,
|
||||
scored_contexts: list[ScoredContext],
|
||||
max_per_source: int = 3,
|
||||
max_per_source: int | None = None,
|
||||
) -> list[ScoredContext]:
|
||||
"""
|
||||
Rerank to ensure source diversity.
|
||||
@@ -264,11 +284,18 @@ class ContextRanker:
|
||||
|
||||
Args:
|
||||
scored_contexts: Already scored contexts
|
||||
max_per_source: Maximum items per source
|
||||
max_per_source: Maximum items per source (uses settings if None)
|
||||
|
||||
Returns:
|
||||
Reranked contexts
|
||||
"""
|
||||
# Use provided value or fall back to settings
|
||||
effective_max = (
|
||||
max_per_source
|
||||
if max_per_source is not None
|
||||
else self._settings.diversity_max_per_source
|
||||
)
|
||||
|
||||
source_counts: dict[str, int] = {}
|
||||
result: list[ScoredContext] = []
|
||||
deferred: list[ScoredContext] = []
|
||||
@@ -277,7 +304,7 @@ class ContextRanker:
|
||||
source = sc.context.source
|
||||
current_count = source_counts.get(source, 0)
|
||||
|
||||
if current_count < max_per_source:
|
||||
if current_count < effective_max:
|
||||
result.append(sc)
|
||||
source_counts[source] = current_count + 1
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user