feat(context): enhance performance, caching, and settings management

- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings).
- Optimize parallel execution in token counting, scoring, and reranking for source diversity.
- Improve caching logic:
  - Add per-context locks for safe parallel scoring.
  - Reuse precomputed fingerprints for cache efficiency.
- Make truncation, scoring, and ranker behaviors fully configurable via settings.
- Add support for middle truncation, context hash-based hashing, and dynamic token limiting.
- Refactor methods for scalability and better error handling.

Tests: Updated all affected components with additional test cases.
This commit is contained in:
2026-01-04 12:37:58 +01:00
parent 6c7b72f130
commit 96e6400bd8
8 changed files with 256 additions and 86 deletions

View File

@@ -8,6 +8,7 @@ import asyncio
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from weakref import WeakValueDictionary
from ..config import ContextSettings, get_context_settings
from ..types import BaseContext
@@ -89,6 +90,11 @@ class CompositeScorer:
self._recency_scorer = RecencyScorer(weight=self._recency_weight)
self._priority_scorer = PriorityScorer(weight=self._priority_weight)
# Per-context locks to prevent race conditions during parallel scoring
# Uses WeakValueDictionary so locks are garbage collected when not in use
self._context_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
self._locks_lock = asyncio.Lock() # Lock to protect _context_locks access
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
"""Set MCP manager for semantic scoring."""
self._relevance_scorer.set_mcp_manager(mcp_manager)
@@ -128,6 +134,38 @@ class CompositeScorer:
self._priority_weight = max(0.0, min(1.0, priority))
self._priority_scorer.weight = self._priority_weight
async def _get_context_lock(self, context_id: str) -> asyncio.Lock:
"""
Get or create a lock for a specific context.
Thread-safe access to per-context locks prevents race conditions
when the same context is scored concurrently.
Args:
context_id: The context ID to get a lock for
Returns:
asyncio.Lock for the context
"""
# Fast path: check if lock exists without acquiring main lock
if context_id in self._context_locks:
lock = self._context_locks.get(context_id)
if lock is not None:
return lock
# Slow path: create lock while holding main lock
async with self._locks_lock:
# Double-check after acquiring lock
if context_id in self._context_locks:
lock = self._context_locks.get(context_id)
if lock is not None:
return lock
# Create new lock
new_lock = asyncio.Lock()
self._context_locks[context_id] = new_lock
return new_lock
async def score(
self,
context: BaseContext,
@@ -157,6 +195,9 @@ class CompositeScorer:
"""
Compute composite score with individual scores.
Uses per-context locking to prevent race conditions when the same
context is scored concurrently in parallel scoring operations.
Args:
context: Context to score
query: Query to score against
@@ -165,46 +206,50 @@ class CompositeScorer:
Returns:
ScoredContext with all scores
"""
# Check if context already has a score
if context._score is not None:
return ScoredContext(
context=context,
composite_score=context._score,
# Get lock for this specific context to prevent race conditions
context_lock = await self._get_context_lock(context.id)
async with context_lock:
# Check if context already has a score (inside lock to prevent races)
if context._score is not None:
return ScoredContext(
context=context,
composite_score=context._score,
)
# Compute individual scores in parallel
relevance_task = self._relevance_scorer.score(context, query, **kwargs)
recency_task = self._recency_scorer.score(context, query, **kwargs)
priority_task = self._priority_scorer.score(context, query, **kwargs)
relevance_score, recency_score, priority_score = await asyncio.gather(
relevance_task, recency_task, priority_task
)
# Compute individual scores in parallel
relevance_task = self._relevance_scorer.score(context, query, **kwargs)
recency_task = self._recency_scorer.score(context, query, **kwargs)
priority_task = self._priority_scorer.score(context, query, **kwargs)
# Compute weighted composite
total_weight = (
self._relevance_weight + self._recency_weight + self._priority_weight
)
relevance_score, recency_score, priority_score = await asyncio.gather(
relevance_task, recency_task, priority_task
)
if total_weight > 0:
composite = (
relevance_score * self._relevance_weight
+ recency_score * self._recency_weight
+ priority_score * self._priority_weight
) / total_weight
else:
composite = 0.0
# Compute weighted composite
total_weight = (
self._relevance_weight + self._recency_weight + self._priority_weight
)
# Cache the score on the context (now safe - inside lock)
context._score = composite
if total_weight > 0:
composite = (
relevance_score * self._relevance_weight
+ recency_score * self._recency_weight
+ priority_score * self._priority_weight
) / total_weight
else:
composite = 0.0
# Cache the score on the context
context._score = composite
return ScoredContext(
context=context,
composite_score=composite,
relevance_score=relevance_score,
recency_score=recency_score,
priority_score=priority_score,
)
return ScoredContext(
context=context,
composite_score=composite,
relevance_score=relevance_score,
recency_score=recency_score,
priority_score=priority_score,
)
async def score_batch(
self,