forked from cardosofelipe/fast-next-template
feat(context): implement context scoring and ranking (Phase 3)
Add comprehensive scoring system with three strategies: - RelevanceScorer: Semantic similarity with keyword fallback - RecencyScorer: Exponential decay with type-specific half-lives - PriorityScorer: Priority-based scoring with type bonuses Implement CompositeScorer combining all strategies with configurable weights (default: 50% relevance, 30% recency, 20% priority). Add ContextRanker for budget-aware context selection with: - Greedy selection algorithm respecting token budgets - CRITICAL priority contexts always included - Diversity reranking to prevent source dominance - Comprehensive selection statistics 68 tests covering all scoring and ranking functionality. Part of #61 - Context Management Engine 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,21 @@
|
||||
"""
|
||||
Context Scoring Module.
|
||||
|
||||
Provides relevance, recency, and composite scoring.
|
||||
Provides scoring strategies for context prioritization.
|
||||
"""
|
||||
|
||||
from .base import BaseScorer, ScorerProtocol
|
||||
from .composite import CompositeScorer, ScoredContext
|
||||
from .priority import PriorityScorer
|
||||
from .recency import RecencyScorer
|
||||
from .relevance import RelevanceScorer
|
||||
|
||||
__all__ = [
|
||||
"BaseScorer",
|
||||
"CompositeScorer",
|
||||
"PriorityScorer",
|
||||
"RecencyScorer",
|
||||
"RelevanceScorer",
|
||||
"ScoredContext",
|
||||
"ScorerProtocol",
|
||||
]
|
||||
|
||||
99
backend/app/services/context/scoring/base.py
Normal file
99
backend/app/services/context/scoring/base.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""
|
||||
Base Scorer Protocol and Types.
|
||||
|
||||
Defines the interface for context scoring implementations.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
||||
|
||||
from ..types import BaseContext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.mcp.client_manager import MCPClientManager
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class ScorerProtocol(Protocol):
|
||||
"""Protocol for context scorers."""
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score a context item.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
Score between 0.0 and 1.0
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class BaseScorer(ABC):
|
||||
"""
|
||||
Abstract base class for context scorers.
|
||||
|
||||
Provides common functionality and interface for
|
||||
different scoring strategies.
|
||||
"""
|
||||
|
||||
def __init__(self, weight: float = 1.0) -> None:
|
||||
"""
|
||||
Initialize scorer.
|
||||
|
||||
Args:
|
||||
weight: Weight for this scorer in composite scoring
|
||||
"""
|
||||
self._weight = weight
|
||||
|
||||
@property
|
||||
def weight(self) -> float:
|
||||
"""Get scorer weight."""
|
||||
return self._weight
|
||||
|
||||
@weight.setter
|
||||
def weight(self, value: float) -> None:
|
||||
"""Set scorer weight."""
|
||||
if not 0.0 <= value <= 1.0:
|
||||
raise ValueError("Weight must be between 0.0 and 1.0")
|
||||
self._weight = value
|
||||
|
||||
@abstractmethod
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score a context item.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
Score between 0.0 and 1.0
|
||||
"""
|
||||
...
|
||||
|
||||
def normalize_score(self, score: float) -> float:
|
||||
"""
|
||||
Normalize score to [0.0, 1.0] range.
|
||||
|
||||
Args:
|
||||
score: Raw score
|
||||
|
||||
Returns:
|
||||
Normalized score
|
||||
"""
|
||||
return max(0.0, min(1.0, score))
|
||||
276
backend/app/services/context/scoring/composite.py
Normal file
276
backend/app/services/context/scoring/composite.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""
|
||||
Composite Scorer for Context Management.
|
||||
|
||||
Combines multiple scoring strategies with configurable weights.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from .base import BaseScorer
|
||||
from .priority import PriorityScorer
|
||||
from .recency import RecencyScorer
|
||||
from .relevance import RelevanceScorer
|
||||
from ..config import ContextSettings, get_context_settings
|
||||
from ..types import BaseContext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.mcp.client_manager import MCPClientManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoredContext:
|
||||
"""Context with computed scores."""
|
||||
|
||||
context: BaseContext
|
||||
composite_score: float
|
||||
relevance_score: float = 0.0
|
||||
recency_score: float = 0.0
|
||||
priority_score: float = 0.0
|
||||
|
||||
def __lt__(self, other: "ScoredContext") -> bool:
|
||||
"""Enable sorting by composite score."""
|
||||
return self.composite_score < other.composite_score
|
||||
|
||||
def __gt__(self, other: "ScoredContext") -> bool:
|
||||
"""Enable sorting by composite score."""
|
||||
return self.composite_score > other.composite_score
|
||||
|
||||
|
||||
class CompositeScorer:
|
||||
"""
|
||||
Combines multiple scoring strategies.
|
||||
|
||||
Weights:
|
||||
- relevance: How well content matches the query
|
||||
- recency: How recent the content is
|
||||
- priority: Explicit priority assignments
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mcp_manager: "MCPClientManager | None" = None,
|
||||
settings: ContextSettings | None = None,
|
||||
relevance_weight: float | None = None,
|
||||
recency_weight: float | None = None,
|
||||
priority_weight: float | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize composite scorer.
|
||||
|
||||
Args:
|
||||
mcp_manager: MCP manager for semantic scoring
|
||||
settings: Context settings (uses default if None)
|
||||
relevance_weight: Override relevance weight
|
||||
recency_weight: Override recency weight
|
||||
priority_weight: Override priority weight
|
||||
"""
|
||||
self._settings = settings or get_context_settings()
|
||||
weights = self._settings.get_scoring_weights()
|
||||
|
||||
self._relevance_weight = (
|
||||
relevance_weight if relevance_weight is not None else weights["relevance"]
|
||||
)
|
||||
self._recency_weight = (
|
||||
recency_weight if recency_weight is not None else weights["recency"]
|
||||
)
|
||||
self._priority_weight = (
|
||||
priority_weight if priority_weight is not None else weights["priority"]
|
||||
)
|
||||
|
||||
# Initialize scorers
|
||||
self._relevance_scorer = RelevanceScorer(
|
||||
mcp_manager=mcp_manager,
|
||||
weight=self._relevance_weight,
|
||||
)
|
||||
self._recency_scorer = RecencyScorer(weight=self._recency_weight)
|
||||
self._priority_scorer = PriorityScorer(weight=self._priority_weight)
|
||||
|
||||
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
|
||||
"""Set MCP manager for semantic scoring."""
|
||||
self._relevance_scorer.set_mcp_manager(mcp_manager)
|
||||
|
||||
@property
|
||||
def weights(self) -> dict[str, float]:
|
||||
"""Get current scoring weights."""
|
||||
return {
|
||||
"relevance": self._relevance_weight,
|
||||
"recency": self._recency_weight,
|
||||
"priority": self._priority_weight,
|
||||
}
|
||||
|
||||
def update_weights(
|
||||
self,
|
||||
relevance: float | None = None,
|
||||
recency: float | None = None,
|
||||
priority: float | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Update scoring weights.
|
||||
|
||||
Args:
|
||||
relevance: New relevance weight
|
||||
recency: New recency weight
|
||||
priority: New priority weight
|
||||
"""
|
||||
if relevance is not None:
|
||||
self._relevance_weight = max(0.0, min(1.0, relevance))
|
||||
self._relevance_scorer.weight = self._relevance_weight
|
||||
|
||||
if recency is not None:
|
||||
self._recency_weight = max(0.0, min(1.0, recency))
|
||||
self._recency_scorer.weight = self._recency_weight
|
||||
|
||||
if priority is not None:
|
||||
self._priority_weight = max(0.0, min(1.0, priority))
|
||||
self._priority_scorer.weight = self._priority_weight
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Compute composite score for a context.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
Composite score between 0.0 and 1.0
|
||||
"""
|
||||
scored = await self.score_with_details(context, query, **kwargs)
|
||||
return scored.composite_score
|
||||
|
||||
async def score_with_details(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> ScoredContext:
|
||||
"""
|
||||
Compute composite score with individual scores.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
ScoredContext with all scores
|
||||
"""
|
||||
# Check if context already has a score
|
||||
if context._score is not None:
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=context._score,
|
||||
)
|
||||
|
||||
# Compute individual scores in parallel
|
||||
relevance_task = self._relevance_scorer.score(context, query, **kwargs)
|
||||
recency_task = self._recency_scorer.score(context, query, **kwargs)
|
||||
priority_task = self._priority_scorer.score(context, query, **kwargs)
|
||||
|
||||
relevance_score, recency_score, priority_score = await asyncio.gather(
|
||||
relevance_task, recency_task, priority_task
|
||||
)
|
||||
|
||||
# Compute weighted composite
|
||||
total_weight = (
|
||||
self._relevance_weight + self._recency_weight + self._priority_weight
|
||||
)
|
||||
|
||||
if total_weight > 0:
|
||||
composite = (
|
||||
relevance_score * self._relevance_weight
|
||||
+ recency_score * self._recency_weight
|
||||
+ priority_score * self._priority_weight
|
||||
) / total_weight
|
||||
else:
|
||||
composite = 0.0
|
||||
|
||||
# Cache the score on the context
|
||||
context._score = composite
|
||||
|
||||
return ScoredContext(
|
||||
context=context,
|
||||
composite_score=composite,
|
||||
relevance_score=relevance_score,
|
||||
recency_score=recency_score,
|
||||
priority_score=priority_score,
|
||||
)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
parallel: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> list[ScoredContext]:
|
||||
"""
|
||||
Score multiple contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to score
|
||||
query: Query to score against
|
||||
parallel: Whether to score in parallel
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
List of ScoredContext (same order as input)
|
||||
"""
|
||||
if parallel:
|
||||
tasks = [
|
||||
self.score_with_details(ctx, query, **kwargs) for ctx in contexts
|
||||
]
|
||||
return await asyncio.gather(*tasks)
|
||||
else:
|
||||
results = []
|
||||
for ctx in contexts:
|
||||
scored = await self.score_with_details(ctx, query, **kwargs)
|
||||
results.append(scored)
|
||||
return results
|
||||
|
||||
async def rank(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
limit: int | None = None,
|
||||
min_score: float = 0.0,
|
||||
**kwargs: Any,
|
||||
) -> list[ScoredContext]:
|
||||
"""
|
||||
Score and rank contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to rank
|
||||
query: Query to rank against
|
||||
limit: Maximum number of results
|
||||
min_score: Minimum score threshold
|
||||
**kwargs: Additional scoring parameters
|
||||
|
||||
Returns:
|
||||
Sorted list of ScoredContext (highest first)
|
||||
"""
|
||||
# Score all contexts
|
||||
scored = await self.score_batch(contexts, query, **kwargs)
|
||||
|
||||
# Filter by minimum score
|
||||
if min_score > 0:
|
||||
scored = [s for s in scored if s.composite_score >= min_score]
|
||||
|
||||
# Sort by score (highest first)
|
||||
scored.sort(reverse=True)
|
||||
|
||||
# Apply limit
|
||||
if limit is not None:
|
||||
scored = scored[:limit]
|
||||
|
||||
return scored
|
||||
135
backend/app/services/context/scoring/priority.py
Normal file
135
backend/app/services/context/scoring/priority.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Priority Scorer for Context Management.
|
||||
|
||||
Scores context based on assigned priority levels.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseScorer
|
||||
from ..types import BaseContext, ContextPriority, ContextType
|
||||
|
||||
|
||||
class PriorityScorer(BaseScorer):
|
||||
"""
|
||||
Scores context based on priority levels.
|
||||
|
||||
Converts priority enum values to normalized scores.
|
||||
Also applies type-based priority bonuses.
|
||||
"""
|
||||
|
||||
# Default priority bonuses by context type
|
||||
DEFAULT_TYPE_BONUSES: dict[ContextType, float] = {
|
||||
ContextType.SYSTEM: 0.2, # System prompts get a boost
|
||||
ContextType.TASK: 0.15, # Current task is important
|
||||
ContextType.TOOL: 0.1, # Recent tool results matter
|
||||
ContextType.KNOWLEDGE: 0.0, # Knowledge scored by relevance
|
||||
ContextType.CONVERSATION: 0.0, # Conversation scored by recency
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
weight: float = 1.0,
|
||||
type_bonuses: dict[ContextType, float] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize priority scorer.
|
||||
|
||||
Args:
|
||||
weight: Scorer weight for composite scoring
|
||||
type_bonuses: Optional context-type priority bonuses
|
||||
"""
|
||||
super().__init__(weight)
|
||||
self._type_bonuses = type_bonuses or self.DEFAULT_TYPE_BONUSES.copy()
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score context based on priority.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query (not used for priority, kept for interface)
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
Priority score between 0.0 and 1.0
|
||||
"""
|
||||
# Get base priority score
|
||||
priority_value = context.priority
|
||||
base_score = self._priority_to_score(priority_value)
|
||||
|
||||
# Apply type bonus
|
||||
context_type = context.get_type()
|
||||
bonus = self._type_bonuses.get(context_type, 0.0)
|
||||
|
||||
return self.normalize_score(base_score + bonus)
|
||||
|
||||
def _priority_to_score(self, priority: int) -> float:
|
||||
"""
|
||||
Convert priority value to normalized score.
|
||||
|
||||
Priority values (from ContextPriority):
|
||||
- CRITICAL (100) -> 1.0
|
||||
- HIGH (80) -> 0.8
|
||||
- NORMAL (50) -> 0.5
|
||||
- LOW (20) -> 0.2
|
||||
- MINIMAL (0) -> 0.0
|
||||
|
||||
Args:
|
||||
priority: Priority value (0-100)
|
||||
|
||||
Returns:
|
||||
Normalized score (0.0-1.0)
|
||||
"""
|
||||
# Clamp to valid range
|
||||
clamped = max(0, min(100, priority))
|
||||
return clamped / 100.0
|
||||
|
||||
def get_type_bonus(self, context_type: ContextType) -> float:
|
||||
"""
|
||||
Get priority bonus for a context type.
|
||||
|
||||
Args:
|
||||
context_type: Context type
|
||||
|
||||
Returns:
|
||||
Bonus value
|
||||
"""
|
||||
return self._type_bonuses.get(context_type, 0.0)
|
||||
|
||||
def set_type_bonus(self, context_type: ContextType, bonus: float) -> None:
|
||||
"""
|
||||
Set priority bonus for a context type.
|
||||
|
||||
Args:
|
||||
context_type: Context type
|
||||
bonus: Bonus value (0.0-1.0)
|
||||
"""
|
||||
if not 0.0 <= bonus <= 1.0:
|
||||
raise ValueError("Bonus must be between 0.0 and 1.0")
|
||||
self._type_bonuses[context_type] = bonus
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Score multiple contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to score
|
||||
query: Query (not used)
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
List of scores (same order as input)
|
||||
"""
|
||||
# Priority scoring is fast, no async needed
|
||||
return [await self.score(ctx, query, **kwargs) for ctx in contexts]
|
||||
141
backend/app/services/context/scoring/recency.py
Normal file
141
backend/app/services/context/scoring/recency.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Recency Scorer for Context Management.
|
||||
|
||||
Scores context based on how recent it is.
|
||||
More recent content gets higher scores.
|
||||
"""
|
||||
|
||||
import math
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseScorer
|
||||
from ..types import BaseContext, ContextType
|
||||
|
||||
|
||||
class RecencyScorer(BaseScorer):
|
||||
"""
|
||||
Scores context based on recency.
|
||||
|
||||
Uses exponential decay to score content based on age.
|
||||
More recent content scores higher.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
weight: float = 1.0,
|
||||
half_life_hours: float = 24.0,
|
||||
type_half_lives: dict[ContextType, float] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize recency scorer.
|
||||
|
||||
Args:
|
||||
weight: Scorer weight for composite scoring
|
||||
half_life_hours: Default hours until score decays to 0.5
|
||||
type_half_lives: Optional context-type-specific half lives
|
||||
"""
|
||||
super().__init__(weight)
|
||||
self._half_life_hours = half_life_hours
|
||||
self._type_half_lives = type_half_lives or {}
|
||||
|
||||
# Set sensible defaults for context types
|
||||
if ContextType.CONVERSATION not in self._type_half_lives:
|
||||
self._type_half_lives[ContextType.CONVERSATION] = 1.0 # 1 hour
|
||||
if ContextType.TOOL not in self._type_half_lives:
|
||||
self._type_half_lives[ContextType.TOOL] = 0.5 # 30 minutes
|
||||
if ContextType.KNOWLEDGE not in self._type_half_lives:
|
||||
self._type_half_lives[ContextType.KNOWLEDGE] = 168.0 # 1 week
|
||||
if ContextType.SYSTEM not in self._type_half_lives:
|
||||
self._type_half_lives[ContextType.SYSTEM] = 720.0 # 30 days
|
||||
if ContextType.TASK not in self._type_half_lives:
|
||||
self._type_half_lives[ContextType.TASK] = 24.0 # 1 day
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score context based on recency.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query (not used for recency, kept for interface)
|
||||
**kwargs: Additional parameters
|
||||
- reference_time: Time to measure recency from (default: now)
|
||||
|
||||
Returns:
|
||||
Recency score between 0.0 and 1.0
|
||||
"""
|
||||
reference_time = kwargs.get("reference_time")
|
||||
if reference_time is None:
|
||||
reference_time = datetime.now(UTC)
|
||||
elif reference_time.tzinfo is None:
|
||||
reference_time = reference_time.replace(tzinfo=UTC)
|
||||
|
||||
# Ensure context timestamp is timezone-aware
|
||||
context_time = context.timestamp
|
||||
if context_time.tzinfo is None:
|
||||
context_time = context_time.replace(tzinfo=UTC)
|
||||
|
||||
# Calculate age in hours
|
||||
age = reference_time - context_time
|
||||
age_hours = max(0, age.total_seconds() / 3600)
|
||||
|
||||
# Get half-life for this context type
|
||||
context_type = context.get_type()
|
||||
half_life = self._type_half_lives.get(context_type, self._half_life_hours)
|
||||
|
||||
# Exponential decay
|
||||
decay_factor = math.exp(-math.log(2) * age_hours / half_life)
|
||||
|
||||
return self.normalize_score(decay_factor)
|
||||
|
||||
def get_half_life(self, context_type: ContextType) -> float:
|
||||
"""
|
||||
Get half-life for a context type.
|
||||
|
||||
Args:
|
||||
context_type: Context type to get half-life for
|
||||
|
||||
Returns:
|
||||
Half-life in hours
|
||||
"""
|
||||
return self._type_half_lives.get(context_type, self._half_life_hours)
|
||||
|
||||
def set_half_life(self, context_type: ContextType, hours: float) -> None:
|
||||
"""
|
||||
Set half-life for a context type.
|
||||
|
||||
Args:
|
||||
context_type: Context type to set half-life for
|
||||
hours: Half-life in hours
|
||||
"""
|
||||
if hours <= 0:
|
||||
raise ValueError("Half-life must be positive")
|
||||
self._type_half_lives[context_type] = hours
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Score multiple contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to score
|
||||
query: Query (not used)
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
List of scores (same order as input)
|
||||
"""
|
||||
scores = []
|
||||
for context in contexts:
|
||||
score = await self.score(context, query, **kwargs)
|
||||
scores.append(score)
|
||||
return scores
|
||||
188
backend/app/services/context/scoring/relevance.py
Normal file
188
backend/app/services/context/scoring/relevance.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""
|
||||
Relevance Scorer for Context Management.
|
||||
|
||||
Scores context based on semantic similarity to the query.
|
||||
Uses Knowledge Base embeddings when available.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from .base import BaseScorer
|
||||
from ..types import BaseContext, ContextType, KnowledgeContext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.services.mcp.client_manager import MCPClientManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RelevanceScorer(BaseScorer):
|
||||
"""
|
||||
Scores context based on relevance to query.
|
||||
|
||||
Uses multiple strategies:
|
||||
1. Pre-computed scores (from RAG results)
|
||||
2. MCP-based semantic similarity (via Knowledge Base)
|
||||
3. Keyword matching fallback
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mcp_manager: "MCPClientManager | None" = None,
|
||||
weight: float = 1.0,
|
||||
keyword_fallback_weight: float = 0.5,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize relevance scorer.
|
||||
|
||||
Args:
|
||||
mcp_manager: MCP manager for Knowledge Base calls
|
||||
weight: Scorer weight for composite scoring
|
||||
keyword_fallback_weight: Max score for keyword-based fallback
|
||||
"""
|
||||
super().__init__(weight)
|
||||
self._mcp = mcp_manager
|
||||
self._keyword_fallback_weight = keyword_fallback_weight
|
||||
|
||||
def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
|
||||
"""Set MCP manager for semantic scoring."""
|
||||
self._mcp = mcp_manager
|
||||
|
||||
async def score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> float:
|
||||
"""
|
||||
Score context relevance to query.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
Relevance score between 0.0 and 1.0
|
||||
"""
|
||||
# 1. Check for pre-computed relevance score
|
||||
if isinstance(context, KnowledgeContext) and context.relevance_score is not None:
|
||||
return self.normalize_score(context.relevance_score)
|
||||
|
||||
# 2. Check metadata for score
|
||||
if "relevance_score" in context.metadata:
|
||||
return self.normalize_score(context.metadata["relevance_score"])
|
||||
|
||||
if "score" in context.metadata:
|
||||
return self.normalize_score(context.metadata["score"])
|
||||
|
||||
# 3. Try MCP-based semantic similarity
|
||||
if self._mcp is not None:
|
||||
try:
|
||||
score = await self._compute_semantic_similarity(context, query)
|
||||
if score is not None:
|
||||
return score
|
||||
except Exception as e:
|
||||
logger.debug(f"Semantic scoring failed, using fallback: {e}")
|
||||
|
||||
# 4. Fall back to keyword matching
|
||||
return self._compute_keyword_score(context, query)
|
||||
|
||||
async def _compute_semantic_similarity(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
) -> float | None:
|
||||
"""
|
||||
Compute semantic similarity using Knowledge Base embeddings.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to compare
|
||||
|
||||
Returns:
|
||||
Similarity score or None if unavailable
|
||||
"""
|
||||
try:
|
||||
# Use Knowledge Base's search capability to compute similarity
|
||||
result = await self._mcp.call_tool(
|
||||
server="knowledge-base",
|
||||
tool="compute_similarity",
|
||||
args={
|
||||
"text1": query,
|
||||
"text2": context.content[:2000], # Limit content length
|
||||
},
|
||||
)
|
||||
|
||||
if result.success and result.data:
|
||||
similarity = result.data.get("similarity")
|
||||
if similarity is not None:
|
||||
return self.normalize_score(float(similarity))
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Semantic similarity computation failed: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _compute_keyword_score(
|
||||
self,
|
||||
context: BaseContext,
|
||||
query: str,
|
||||
) -> float:
|
||||
"""
|
||||
Compute relevance score based on keyword matching.
|
||||
|
||||
Simple but fast fallback when semantic search is unavailable.
|
||||
|
||||
Args:
|
||||
context: Context to score
|
||||
query: Query to match
|
||||
|
||||
Returns:
|
||||
Keyword-based relevance score
|
||||
"""
|
||||
if not query or not context.content:
|
||||
return 0.0
|
||||
|
||||
# Extract keywords from query
|
||||
query_lower = query.lower()
|
||||
content_lower = context.content.lower()
|
||||
|
||||
# Simple word tokenization
|
||||
query_words = set(re.findall(r"\b\w{3,}\b", query_lower))
|
||||
content_words = set(re.findall(r"\b\w{3,}\b", content_lower))
|
||||
|
||||
if not query_words:
|
||||
return 0.0
|
||||
|
||||
# Calculate overlap
|
||||
common_words = query_words & content_words
|
||||
overlap_ratio = len(common_words) / len(query_words)
|
||||
|
||||
# Apply fallback weight ceiling
|
||||
return self.normalize_score(overlap_ratio * self._keyword_fallback_weight)
|
||||
|
||||
async def score_batch(
|
||||
self,
|
||||
contexts: list[BaseContext],
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Score multiple contexts.
|
||||
|
||||
Args:
|
||||
contexts: Contexts to score
|
||||
query: Query to score against
|
||||
**kwargs: Additional parameters
|
||||
|
||||
Returns:
|
||||
List of scores (same order as input)
|
||||
"""
|
||||
scores = []
|
||||
for context in contexts:
|
||||
score = await self.score(context, query, **kwargs)
|
||||
scores.append(score)
|
||||
return scores
|
||||
Reference in New Issue
Block a user