feat(context): implement context scoring and ranking (Phase 3)
Add comprehensive scoring system with three strategies: - RelevanceScorer: Semantic similarity with keyword fallback - RecencyScorer: Exponential decay with type-specific half-lives - PriorityScorer: Priority-based scoring with type bonuses Implement CompositeScorer combining all strategies with configurable weights (default: 50% relevance, 30% recency, 20% priority). Add ContextRanker for budget-aware context selection with: - Greedy selection algorithm respecting token budgets - CRITICAL priority contexts always included - Diversity reranking to prevent source dominance - Comprehensive selection statistics 68 tests covering all scoring and ranking functionality. Part of #61 - Context Management Engine 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
188
backend/app/services/context/scoring/relevance.py
Normal file
188
backend/app/services/context/scoring/relevance.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Relevance Scorer for Context Management.
|
||||
|
||||
Scores context based on semantic similarity to the query.
|
||||
Uses Knowledge Base embeddings when available.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from .base import BaseScorer
|
||||
from ..types import BaseContext, ContextType, KnowledgeContext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.mcp.client_manager import MCPClientManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RelevanceScorer(BaseScorer):
|
||||
"""
|
||||
Scores context based on relevance to query.
|
||||
|
||||
Uses multiple strategies:
|
||||
1. Pre-computed scores (from RAG results)
|
||||
2. MCP-based semantic similarity (via Knowledge Base)
|
||||
3. Keyword matching fallback
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mcp_manager: "MCPClientManager | None" = None,
|
||||
weight: float = 1.0,
|
||||
keyword_fallback_weight: float = 0.5,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize relevance scorer.
|
||||
|
||||
Args:
|
||||
mcp_manager: MCP manager for Knowledge Base calls
|
||||
weight: Scorer weight for composite scoring
|
||||
keyword_fallback_weight: Max score for keyword-based fallback
|
||||
"""
|
||||
super().__init__(weight)
|
||||
self._mcp = mcp_manager
|
||||
self._keyword_fallback_weight = keyword_fallback_weight
|
||||
|
||||
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
|
||||
"""Set MCP manager for semantic scoring."""
|
||||
self._mcp = mcp_manager
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score context relevance to query.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
Relevance score between 0.0 and 1.0
|
||||
"""
|
||||
# 1. Check for pre-computed relevance score
|
||||
if isinstance(context, KnowledgeContext) and context.relevance_score is not None:
|
||||
return self.normalize_score(context.relevance_score)
|
||||
|
||||
# 2. Check metadata for score
|
||||
if "relevance_score" in context.metadata:
|
||||
return self.normalize_score(context.metadata["relevance_score"])
|
||||
|
||||
if "score" in context.metadata:
|
||||
return self.normalize_score(context.metadata["score"])
|
||||
|
||||
# 3. Try MCP-based semantic similarity
|
||||
if self._mcp is not None:
|
||||
try:
|
||||
score = await self._compute_semantic_similarity(context, query)
|
||||
if score is not None:
|
||||
return score
|
||||
except Exception as e:
|
||||
logger.debug(f"Semantic scoring failed, using fallback: {e}")
|
||||
|
||||
# 4. Fall back to keyword matching
|
||||
return self._compute_keyword_score(context, query)
|
||||
|
||||
async def _compute_semantic_similarity(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
) -> float | None:
|
||||
"""
|
||||
Compute semantic similarity using Knowledge Base embeddings.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to compare
|
||||
|
||||
Returns:
|
||||
Similarity score or None if unavailable
|
||||
"""
|
||||
try:
|
||||
# Use Knowledge Base's search capability to compute similarity
|
||||
result = await self._mcp.call_tool(
|
||||
server="knowledge-base",
|
||||
tool="compute_similarity",
|
||||
args={
|
||||
"text1": query,
|
||||
"text2": context.content[:2000], # Limit content length
|
||||
},
|
||||
)
|
||||
|
||||
if result.success and result.data:
|
||||
similarity = result.data.get("similarity")
|
||||
if similarity is not None:
|
||||
return self.normalize_score(float(similarity))
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Semantic similarity computation failed: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _compute_keyword_score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
) -> float:
|
||||
"""
|
||||
Compute relevance score based on keyword matching.
|
||||
|
||||
Simple but fast fallback when semantic search is unavailable.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to match
|
||||
|
||||
Returns:
|
||||
Keyword-based relevance score
|
||||
"""
|
||||
if not query or not context.content:
|
||||
return 0.0
|
||||
|
||||
# Extract keywords from query
|
||||
query_lower = query.lower()
|
||||
content_lower = context.content.lower()
|
||||
|
||||
# Simple word tokenization
|
||||
query_words = set(re.findall(r"\b\w{3,}\b", query_lower))
|
||||
content_words = set(re.findall(r"\b\w{3,}\b", content_lower))
|
||||
|
||||
if not query_words:
|
||||
return 0.0
|
||||
|
||||
# Calculate overlap
|
||||
common_words = query_words & content_words
|
||||
overlap_ratio = len(common_words) / len(query_words)
|
||||
|
||||
# Apply fallback weight ceiling
|
||||
return self.normalize_score(overlap_ratio * self._keyword_fallback_weight)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Score multiple contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
List of scores (same order as input)
|
||||
"""
|
||||
scores = []
|
||||
for context in contexts:
|
||||
score = await self.score(context, query, **kwargs)
|
||||
scores.append(score)
|
||||
return scores
|
||||
Reference in New Issue
Block a user