feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings).
- Optimize parallel execution in token counting, scoring, and reranking for source diversity.
- Improve caching logic:
  - Add per-context locks for safe parallel scoring.
  - Reuse precomputed fingerprints for cache efficiency.
- Make truncation, scoring, and ranker behaviors fully configurable via settings.
- Add support for middle truncation, context hash-based hashing, and dynamic token limiting.
- Refactor methods for scalability and better error handling.

Tests: Updated all affected components with additional test cases.
This commit is contained in:
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions

View File

@@ -9,6 +9,7 @@ from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
from ..budget import TokenBudget, TokenCalculator
from ..config import ContextSettings, get_context_settings
from ..scoring.composite import CompositeScorer, ScoredContext
from ..types import BaseContext
@@ -45,6 +46,7 @@ class ContextRanker:
self,
scorer: CompositeScorer | None = None,
calculator: TokenCalculator | None = None,
settings: ContextSettings | None = None,
) -> None:
"""
Initialize context ranker.
@@ -52,7 +54,9 @@ class ContextRanker:
Args:
scorer: Composite scorer for scoring contexts
calculator: Token calculator for counting tokens
settings: Context settings (uses global if None)
"""
self._settings = settings or get_context_settings()
self._scorer = scorer or CompositeScorer()
self._calculator = calculator or TokenCalculator()
@@ -226,16 +230,32 @@ class ContextRanker:
"""
Ensure all contexts have token counts.
Counts tokens in parallel for contexts that don't have counts.
Args:
contexts: Contexts to check
model: Model for token counting
"""
for context in contexts:
if context.token_count is None:
count = await self._calculator.count_tokens(
context.content, model
)
context.token_count = count
import asyncio
# Find contexts needing counts
contexts_needing_counts = [
ctx for ctx in contexts if ctx.token_count is None
]
if not contexts_needing_counts:
return
# Count all in parallel
tasks = [
self._calculator.count_tokens(ctx.content, model)
for ctx in contexts_needing_counts
]
counts = await asyncio.gather(*tasks)
# Assign counts back
for ctx, count in zip(contexts_needing_counts, counts):
ctx.token_count = count
def _count_by_type(
self, scored_contexts: list[ScoredContext]
@@ -255,7 +275,7 @@ class ContextRanker:
async def rerank_for_diversity(
self,
scored_contexts: list[ScoredContext],
max_per_source: int = 3,
max_per_source: int | None = None,
) -> list[ScoredContext]:
"""
Rerank to ensure source diversity.
@@ -264,11 +284,18 @@ class ContextRanker:
Args:
scored_contexts: Already scored contexts
max_per_source: Maximum items per source
max_per_source: Maximum items per source (uses settings if None)
Returns:
Reranked contexts
"""
# Use provided value or fall back to settings
effective_max = (
max_per_source
if max_per_source is not None
else self._settings.diversity_max_per_source
)
source_counts: dict[str, int] = {}
result: list[ScoredContext] = []
deferred: list[ScoredContext] = []
@@ -277,7 +304,7 @@ class ContextRanker:
source = sc.context.source
current_count = source_counts.get(source, 0)
if current_count < max_per_source:
if current_count < effective_max:
result.append(sc)
source_counts[source] = current_count + 1
else: