feat(context): enhance performance, caching, and settings management
- Replace hard-coded limits with configurable settings (e.g., cache memory size, truncation strategy, relevance settings). - Optimize parallel execution in token counting, scoring, and reranking for source diversity. - Improve caching logic: - Add per-context locks for safe parallel scoring. - Reuse precomputed fingerprints for cache efficiency. - Make truncation, scoring, and ranker behaviors fully configurable via settings. - Add support for middle truncation, context hash-based hashing, and dynamic token limiting. - Refactor methods for scalability and better error handling. Tests: Updated all affected components with additional test cases.
This commit is contained in:
@@ -8,6 +8,7 @@ import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from weakref import WeakValueDictionary
|
||||
|
||||
from ..config import ContextSettings, get_context_settings
|
||||
from ..types import BaseContext
|
||||
@@ -89,6 +90,11 @@ class CompositeScorer:
|
||||
self._recency_scorer = RecencyScorer(weight=self._recency_weight)
|
||||
self._priority_scorer = PriorityScorer(weight=self._priority_weight)
|
||||
|
||||
# Per-context locks to prevent race conditions during parallel scoring
|
||||
# Uses WeakValueDictionary so locks are garbage collected when not in use
|
||||
self._context_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
|
||||
self._locks_lock = asyncio.Lock() # Lock to protect _context_locks access
|
||||
|
||||
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
|
||||
"""Set MCP manager for semantic scoring."""
|
||||
self._relevance_scorer.set_mcp_manager(mcp_manager)
|
||||
@@ -128,6 +134,38 @@ class CompositeScorer:
|
||||
self._priority_weight = max(0.0, min(1.0, priority))
|
||||
self._priority_scorer.weight = self._priority_weight
|
||||
|
||||
async def _get_context_lock(self, context_id: str) -> asyncio.Lock:
|
||||
"""
|
||||
Get or create a lock for a specific context.
|
||||
|
||||
Thread-safe access to per-context locks prevents race conditions
|
||||
when the same context is scored concurrently.
|
||||
|
||||
Args:
|
||||
context_id: The context ID to get a lock for
|
||||
|
||||
Returns:
|
||||
asyncio.Lock for the context
|
||||
"""
|
||||
# Fast path: check if lock exists without acquiring main lock
|
||||
if context_id in self._context_locks:
|
||||
lock = self._context_locks.get(context_id)
|
||||
if lock is not None:
|
||||
return lock
|
||||
|
||||
# Slow path: create lock while holding main lock
|
||||
async with self._locks_lock:
|
||||
# Double-check after acquiring lock
|
||||
if context_id in self._context_locks:
|
||||
lock = self._context_locks.get(context_id)
|
||||
if lock is not None:
|
||||
return lock
|
||||
|
||||
# Create new lock
|
||||
new_lock = asyncio.Lock()
|
||||
self._context_locks[context_id] = new_lock
|
||||
return new_lock
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
@@ -157,6 +195,9 @@ class CompositeScorer:
|
||||
"""
|
||||
Compute composite score with individual scores.
|
||||
|
||||
Uses per-context locking to prevent race conditions when the same
|
||||
context is scored concurrently in parallel scoring operations.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
@@ -165,46 +206,50 @@ class CompositeScorer:
|
||||
Returns:
|
||||
ScoredContext with all scores
|
||||
"""
|
||||
# Check if context already has a score
|
||||
if context._score is not None:
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=context._score,
|
||||
# Get lock for this specific context to prevent race conditions
|
||||
context_lock = await self._get_context_lock(context.id)
|
||||
|
||||
async with context_lock:
|
||||
# Check if context already has a score (inside lock to prevent races)
|
||||
if context._score is not None:
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=context._score,
|
||||
)
|
||||
|
||||
# Compute individual scores in parallel
|
||||
relevance_task = self._relevance_scorer.score(context, query, **kwargs)
|
||||
recency_task = self._recency_scorer.score(context, query, **kwargs)
|
||||
priority_task = self._priority_scorer.score(context, query, **kwargs)
|
||||
|
||||
relevance_score, recency_score, priority_score = await asyncio.gather(
|
||||
relevance_task, recency_task, priority_task
|
||||
)
|
||||
|
||||
# Compute individual scores in parallel
|
||||
relevance_task = self._relevance_scorer.score(context, query, **kwargs)
|
||||
recency_task = self._recency_scorer.score(context, query, **kwargs)
|
||||
priority_task = self._priority_scorer.score(context, query, **kwargs)
|
||||
# Compute weighted composite
|
||||
total_weight = (
|
||||
self._relevance_weight + self._recency_weight + self._priority_weight
|
||||
)
|
||||
|
||||
relevance_score, recency_score, priority_score = await asyncio.gather(
|
||||
relevance_task, recency_task, priority_task
|
||||
)
|
||||
if total_weight > 0:
|
||||
composite = (
|
||||
relevance_score * self._relevance_weight
|
||||
+ recency_score * self._recency_weight
|
||||
+ priority_score * self._priority_weight
|
||||
) / total_weight
|
||||
else:
|
||||
composite = 0.0
|
||||
|
||||
# Compute weighted composite
|
||||
total_weight = (
|
||||
self._relevance_weight + self._recency_weight + self._priority_weight
|
||||
)
|
||||
# Cache the score on the context (now safe - inside lock)
|
||||
context._score = composite
|
||||
|
||||
if total_weight > 0:
|
||||
composite = (
|
||||
relevance_score * self._relevance_weight
|
||||
+ recency_score * self._recency_weight
|
||||
+ priority_score * self._priority_weight
|
||||
) / total_weight
|
||||
else:
|
||||
composite = 0.0
|
||||
|
||||
# Cache the score on the context
|
||||
context._score = composite
|
||||
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=composite,
|
||||
relevance_score=relevance_score,
|
||||
recency_score=recency_score,
|
||||
priority_score=priority_score,
|
||||
)
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=composite,
|
||||
relevance_score=relevance_score,
|
||||
recency_score=recency_score,
|
||||
priority_score=priority_score,
|
||||
)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user