Files
syndarix/backend/app/services/context/scoring/relevance.py
Felipe Cardoso 0d2005ddcb feat(context): implement context scoring and ranking (Phase 3)
Add comprehensive scoring system with three strategies:
- RelevanceScorer: Semantic similarity with keyword fallback
- RecencyScorer: Exponential decay with type-specific half-lives
- PriorityScorer: Priority-based scoring with type bonuses

Implement CompositeScorer combining all strategies with configurable
weights (default: 50% relevance, 30% recency, 20% priority).

Add ContextRanker for budget-aware context selection with:
- Greedy selection algorithm respecting token budgets
- CRITICAL priority contexts always included
- Diversity reranking to prevent source dominance
- Comprehensive selection statistics

68 tests covering all scoring and ranking functionality.

Part of #61 - Context Management Engine

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 02:24:06 +01:00

189 lines
5.4 KiB
Python

"""
Relevance Scorer for Context Management.
Scores context based on semantic similarity to the query.
Uses Knowledge Base embeddings when available.
"""
import logging
import re
from typing import TYPE_CHECKING, Any
from .base import BaseScorer
from ..types import BaseContext, ContextType, KnowledgeContext
if TYPE_CHECKING:
from app.services.mcp.client_manager import MCPClientManager
logger = logging.getLogger(__name__)
class RelevanceScorer(BaseScorer):
"""
Scores context based on relevance to query.
Uses multiple strategies:
1. Pre-computed scores (from RAG results)
2. MCP-based semantic similarity (via Knowledge Base)
3. Keyword matching fallback
"""
def __init__(
self,
mcp_manager: "MCPClientManager | None" = None,
weight: float = 1.0,
keyword_fallback_weight: float = 0.5,
) -> None:
"""
Initialize relevance scorer.
Args:
mcp_manager: MCP manager for Knowledge Base calls
weight: Scorer weight for composite scoring
keyword_fallback_weight: Max score for keyword-based fallback
"""
super().__init__(weight)
self._mcp = mcp_manager
self._keyword_fallback_weight = keyword_fallback_weight
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
"""Set MCP manager for semantic scoring."""
self._mcp = mcp_manager
async def score(
self,
context: BaseContext,
query: str,
**kwargs: Any,
) -> float:
"""
Score context relevance to query.
Args:
context: Context to score
query: Query to score against
**kwargs: Additional parameters
Returns:
Relevance score between 0.0 and 1.0
"""
# 1. Check for pre-computed relevance score
if isinstance(context, KnowledgeContext) and context.relevance_score is not None:
return self.normalize_score(context.relevance_score)
# 2. Check metadata for score
if "relevance_score" in context.metadata:
return self.normalize_score(context.metadata["relevance_score"])
if "score" in context.metadata:
return self.normalize_score(context.metadata["score"])
# 3. Try MCP-based semantic similarity
if self._mcp is not None:
try:
score = await self._compute_semantic_similarity(context, query)
if score is not None:
return score
except Exception as e:
logger.debug(f"Semantic scoring failed, using fallback: {e}")
# 4. Fall back to keyword matching
return self._compute_keyword_score(context, query)
async def _compute_semantic_similarity(
self,
context: BaseContext,
query: str,
) -> float | None:
"""
Compute semantic similarity using Knowledge Base embeddings.
Args:
context: Context to score
query: Query to compare
Returns:
Similarity score or None if unavailable
"""
try:
# Use Knowledge Base's search capability to compute similarity
result = await self._mcp.call_tool(
server="knowledge-base",
tool="compute_similarity",
args={
"text1": query,
"text2": context.content[:2000], # Limit content length
},
)
if result.success and result.data:
similarity = result.data.get("similarity")
if similarity is not None:
return self.normalize_score(float(similarity))
except Exception as e:
logger.debug(f"Semantic similarity computation failed: {e}")
return None
def _compute_keyword_score(
self,
context: BaseContext,
query: str,
) -> float:
"""
Compute relevance score based on keyword matching.
Simple but fast fallback when semantic search is unavailable.
Args:
context: Context to score
query: Query to match
Returns:
Keyword-based relevance score
"""
if not query or not context.content:
return 0.0
# Extract keywords from query
query_lower = query.lower()
content_lower = context.content.lower()
# Simple word tokenization
query_words = set(re.findall(r"\b\w{3,}\b", query_lower))
content_words = set(re.findall(r"\b\w{3,}\b", content_lower))
if not query_words:
return 0.0
# Calculate overlap
common_words = query_words & content_words
overlap_ratio = len(common_words) / len(query_words)
# Apply fallback weight ceiling
return self.normalize_score(overlap_ratio * self._keyword_fallback_weight)
async def score_batch(
self,
contexts: list[BaseContext],
query: str,
**kwargs: Any,
) -> list[float]:
"""
Score multiple contexts.
Args:
contexts: Contexts to score
query: Query to score against
**kwargs: Additional parameters
Returns:
List of scores (same order as input)
"""
scores = []
for context in contexts:
score = await self.score(context, query, **kwargs)
scores.append(score)
return scores