pragma-stack/backend/app/services/context/scoring/relevance.py

"""
Relevance Scorer for Context Management.

Scores context based on semantic similarity to the query.
Uses Knowledge Base embeddings when available.
"""

import logging
import re
from typing import TYPE_CHECKING, Any

from ..types import BaseContext, KnowledgeContext
from .base import BaseScorer

if TYPE_CHECKING:
    from app.services.mcp.client_manager import MCPClientManager

logger = logging.getLogger(__name__)


class RelevanceScorer(BaseScorer):
    """
    Scores context based on relevance to query.

    Uses multiple strategies:
    1. Pre-computed scores (from RAG results)
    2. MCP-based semantic similarity (via Knowledge Base)
    3. Keyword matching fallback
    """

    def __init__(
        self,
        mcp_manager: "MCPClientManager | None" = None,
        weight: float = 1.0,
        keyword_fallback_weight: float = 0.5,
    ) -> None:
        """
        Initialize relevance scorer.

        Args:
            mcp_manager: MCP manager for Knowledge Base calls
            weight: Scorer weight for composite scoring
            keyword_fallback_weight: Max score for keyword-based fallback
        """
        super().__init__(weight)
        self._mcp = mcp_manager
        self._keyword_fallback_weight = keyword_fallback_weight

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
        self._mcp = mcp_manager

    async def score(
        self,
        context: BaseContext,
        query: str,
        **kwargs: Any,
    ) -> float:
        """
        Score context relevance to query.

        Args:
            context: Context to score
            query: Query to score against
            **kwargs: Additional parameters

        Returns:
            Relevance score between 0.0 and 1.0
        """
        # 1. Check for pre-computed relevance score
        if isinstance(context, KnowledgeContext) and context.relevance_score is not None:
            return self.normalize_score(context.relevance_score)

        # 2. Check metadata for score
        if "relevance_score" in context.metadata:
            return self.normalize_score(context.metadata["relevance_score"])

        if "score" in context.metadata:
            return self.normalize_score(context.metadata["score"])

        # 3. Try MCP-based semantic similarity
        if self._mcp is not None:
            try:
                score = await self._compute_semantic_similarity(context, query)
                if score is not None:
                    return score
            except Exception as e:
                logger.debug(f"Semantic scoring failed, using fallback: {e}")

        # 4. Fall back to keyword matching
        return self._compute_keyword_score(context, query)

    async def _compute_semantic_similarity(
        self,
        context: BaseContext,
        query: str,
    ) -> float | None:
        """
        Compute semantic similarity using Knowledge Base embeddings.

        Args:
            context: Context to score
            query: Query to compare

        Returns:
            Similarity score or None if unavailable
        """
        try:
            # Use Knowledge Base's search capability to compute similarity
            result = await self._mcp.call_tool(
                server="knowledge-base",
                tool="compute_similarity",
                args={
                    "text1": query,
                    "text2": context.content[:2000],  # Limit content length
                },
            )

            if result.success and result.data:
                similarity = result.data.get("similarity")
                if similarity is not None:
                    return self.normalize_score(float(similarity))

        except Exception as e:
            logger.debug(f"Semantic similarity computation failed: {e}")

        return None

    def _compute_keyword_score(
        self,
        context: BaseContext,
        query: str,
    ) -> float:
        """
        Compute relevance score based on keyword matching.

        Simple but fast fallback when semantic search is unavailable.

        Args:
            context: Context to score
            query: Query to match

        Returns:
            Keyword-based relevance score
        """
        if not query or not context.content:
            return 0.0

        # Extract keywords from query
        query_lower = query.lower()
        content_lower = context.content.lower()

        # Simple word tokenization
        query_words = set(re.findall(r"\b\w{3,}\b", query_lower))
        content_words = set(re.findall(r"\b\w{3,}\b", content_lower))

        if not query_words:
            return 0.0

        # Calculate overlap
        common_words = query_words & content_words
        overlap_ratio = len(common_words) / len(query_words)

        # Apply fallback weight ceiling
        return self.normalize_score(overlap_ratio * self._keyword_fallback_weight)

    async def score_batch(
        self,
        contexts: list[BaseContext],
        query: str,
        **kwargs: Any,
    ) -> list[float]:
        """
        Score multiple contexts.

        Args:
            contexts: Contexts to score
            query: Query to score against
            **kwargs: Additional parameters

        Returns:
            List of scores (same order as input)
        """
        scores = []
        for context in contexts:
            score = await self.score(context, query, **kwargs)
            scores.append(score)
        return scores