syndarix/backend/app/services/context/scoring/composite.py

"""
Composite Scorer for Context Management.

Combines multiple scoring strategies with configurable weights.
"""

import asyncio
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

from ..config import ContextSettings, get_context_settings
from ..types import BaseContext
from .priority import PriorityScorer
from .recency import RecencyScorer
from .relevance import RelevanceScorer

if TYPE_CHECKING:
    from app.services.mcp.client_manager import MCPClientManager

logger = logging.getLogger(__name__)


@dataclass
class ScoredContext:
    """Context with computed scores."""

    context: BaseContext
    composite_score: float
    relevance_score: float = 0.0
    recency_score: float = 0.0
    priority_score: float = 0.0

    def __lt__(self, other: "ScoredContext") -> bool:
        """Enable sorting by composite score."""
        return self.composite_score < other.composite_score

    def __gt__(self, other: "ScoredContext") -> bool:
        """Enable sorting by composite score."""
        return self.composite_score > other.composite_score


class CompositeScorer:
    """
    Combines multiple scoring strategies.

    Weights:
    - relevance: How well content matches the query
    - recency: How recent the content is
    - priority: Explicit priority assignments
    """

    def __init__(
        self,
        mcp_manager: "MCPClientManager | None" = None,
        settings: ContextSettings | None = None,
        relevance_weight: float | None = None,
        recency_weight: float | None = None,
        priority_weight: float | None = None,
    ) -> None:
        """
        Initialize composite scorer.

        Args:
            mcp_manager: MCP manager for semantic scoring
            settings: Context settings (uses default if None)
            relevance_weight: Override relevance weight
            recency_weight: Override recency weight
            priority_weight: Override priority weight
        """
        self._settings = settings or get_context_settings()
        weights = self._settings.get_scoring_weights()

        self._relevance_weight = (
            relevance_weight if relevance_weight is not None else weights["relevance"]
        )
        self._recency_weight = (
            recency_weight if recency_weight is not None else weights["recency"]
        )
        self._priority_weight = (
            priority_weight if priority_weight is not None else weights["priority"]
        )

        # Initialize scorers
        self._relevance_scorer = RelevanceScorer(
            mcp_manager=mcp_manager,
            weight=self._relevance_weight,
        )
        self._recency_scorer = RecencyScorer(weight=self._recency_weight)
        self._priority_scorer = PriorityScorer(weight=self._priority_weight)

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
        self._relevance_scorer.set_mcp_manager(mcp_manager)

    @property
    def weights(self) -> dict[str, float]:
        """Get current scoring weights."""
        return {
            "relevance": self._relevance_weight,
            "recency": self._recency_weight,
            "priority": self._priority_weight,
        }

    def update_weights(
        self,
        relevance: float | None = None,
        recency: float | None = None,
        priority: float | None = None,
    ) -> None:
        """
        Update scoring weights.

        Args:
            relevance: New relevance weight
            recency: New recency weight
            priority: New priority weight
        """
        if relevance is not None:
            self._relevance_weight = max(0.0, min(1.0, relevance))
            self._relevance_scorer.weight = self._relevance_weight

        if recency is not None:
            self._recency_weight = max(0.0, min(1.0, recency))
            self._recency_scorer.weight = self._recency_weight

        if priority is not None:
            self._priority_weight = max(0.0, min(1.0, priority))
            self._priority_scorer.weight = self._priority_weight

    async def score(
        self,
        context: BaseContext,
        query: str,
        **kwargs: Any,
    ) -> float:
        """
        Compute composite score for a context.

        Args:
            context: Context to score
            query: Query to score against
            **kwargs: Additional scoring parameters

        Returns:
            Composite score between 0.0 and 1.0
        """
        scored = await self.score_with_details(context, query, **kwargs)
        return scored.composite_score

    async def score_with_details(
        self,
        context: BaseContext,
        query: str,
        **kwargs: Any,
    ) -> ScoredContext:
        """
        Compute composite score with individual scores.

        Args:
            context: Context to score
            query: Query to score against
            **kwargs: Additional scoring parameters

        Returns:
            ScoredContext with all scores
        """
        # Check if context already has a score
        if context._score is not None:
            return ScoredContext(
                context=context,
                composite_score=context._score,
            )

        # Compute individual scores in parallel
        relevance_task = self._relevance_scorer.score(context, query, **kwargs)
        recency_task = self._recency_scorer.score(context, query, **kwargs)
        priority_task = self._priority_scorer.score(context, query, **kwargs)

        relevance_score, recency_score, priority_score = await asyncio.gather(
            relevance_task, recency_task, priority_task
        )

        # Compute weighted composite
        total_weight = (
            self._relevance_weight + self._recency_weight + self._priority_weight
        )

        if total_weight > 0:
            composite = (
                relevance_score * self._relevance_weight
                + recency_score * self._recency_weight
                + priority_score * self._priority_weight
            ) / total_weight
        else:
            composite = 0.0

        # Cache the score on the context
        context._score = composite

        return ScoredContext(
            context=context,
            composite_score=composite,
            relevance_score=relevance_score,
            recency_score=recency_score,
            priority_score=priority_score,
        )

    async def score_batch(
        self,
        contexts: list[BaseContext],
        query: str,
        parallel: bool = True,
        **kwargs: Any,
    ) -> list[ScoredContext]:
        """
        Score multiple contexts.

        Args:
            contexts: Contexts to score
            query: Query to score against
            parallel: Whether to score in parallel
            **kwargs: Additional scoring parameters

        Returns:
            List of ScoredContext (same order as input)
        """
        if parallel:
            tasks = [
                self.score_with_details(ctx, query, **kwargs) for ctx in contexts
            ]
            return await asyncio.gather(*tasks)
        else:
            results = []
            for ctx in contexts:
                scored = await self.score_with_details(ctx, query, **kwargs)
                results.append(scored)
            return results

    async def rank(
        self,
        contexts: list[BaseContext],
        query: str,
        limit: int | None = None,
        min_score: float = 0.0,
        **kwargs: Any,
    ) -> list[ScoredContext]:
        """
        Score and rank contexts.

        Args:
            contexts: Contexts to rank
            query: Query to rank against
            limit: Maximum number of results
            min_score: Minimum score threshold
            **kwargs: Additional scoring parameters

        Returns:
            Sorted list of ScoredContext (highest first)
        """
        # Score all contexts
        scored = await self.score_batch(contexts, query, **kwargs)

        # Filter by minimum score
        if min_score > 0:
            scored = [s for s in scored if s.composite_score >= min_score]

        # Sort by score (highest first)
        scored.sort(reverse=True)

        # Apply limit
        if limit is not None:
            scored = scored[:limit]

        return scored