syndarix/backend/app/services/context/scoring/composite.py

"""
Composite Scorer for Context Management.

Combines multiple scoring strategies with configurable weights.
"""

import asyncio
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any
from weakref import WeakValueDictionary

from ..config import ContextSettings, get_context_settings
from ..types import BaseContext
from .priority import PriorityScorer
from .recency import RecencyScorer
from .relevance import RelevanceScorer

if TYPE_CHECKING:
    from app.services.mcp.client_manager import MCPClientManager

logger = logging.getLogger(__name__)


@dataclass
class ScoredContext:
    """Context with computed scores."""

    context: BaseContext
    composite_score: float
    relevance_score: float = 0.0
    recency_score: float = 0.0
    priority_score: float = 0.0

    def __lt__(self, other: "ScoredContext") -> bool:
        """Enable sorting by composite score."""
        return self.composite_score < other.composite_score

    def __gt__(self, other: "ScoredContext") -> bool:
        """Enable sorting by composite score."""
        return self.composite_score > other.composite_score


class CompositeScorer:
    """
    Combines multiple scoring strategies.

    Weights:
    - relevance: How well content matches the query
    - recency: How recent the content is
    - priority: Explicit priority assignments
    """

    def __init__(
        self,
        mcp_manager: "MCPClientManager | None" = None,
        settings: ContextSettings | None = None,
        relevance_weight: float | None = None,
        recency_weight: float | None = None,
        priority_weight: float | None = None,
    ) -> None:
        """
        Initialize composite scorer.

        Args:
            mcp_manager: MCP manager for semantic scoring
            settings: Context settings (uses default if None)
            relevance_weight: Override relevance weight
            recency_weight: Override recency weight
            priority_weight: Override priority weight
        """
        self._settings = settings or get_context_settings()
        weights = self._settings.get_scoring_weights()

        self._relevance_weight = (
            relevance_weight if relevance_weight is not None else weights["relevance"]
        )
        self._recency_weight = (
            recency_weight if recency_weight is not None else weights["recency"]
        )
        self._priority_weight = (
            priority_weight if priority_weight is not None else weights["priority"]
        )

        # Initialize scorers
        self._relevance_scorer = RelevanceScorer(
            mcp_manager=mcp_manager,
            weight=self._relevance_weight,
        )
        self._recency_scorer = RecencyScorer(weight=self._recency_weight)
        self._priority_scorer = PriorityScorer(weight=self._priority_weight)

        # Per-context locks to prevent race conditions during parallel scoring
        # Uses WeakValueDictionary so locks are garbage collected when not in use
        self._context_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
        self._locks_lock = asyncio.Lock()  # Lock to protect _context_locks access

    def set_mcp_manager(self, mcp_manager: "MCPClientManager") -> None:
        """Set MCP manager for semantic scoring."""
        self._relevance_scorer.set_mcp_manager(mcp_manager)

    @property
    def weights(self) -> dict[str, float]:
        """Get current scoring weights."""
        return {
            "relevance": self._relevance_weight,
            "recency": self._recency_weight,
            "priority": self._priority_weight,
        }

    def update_weights(
        self,
        relevance: float | None = None,
        recency: float | None = None,
        priority: float | None = None,
    ) -> None:
        """
        Update scoring weights.

        Args:
            relevance: New relevance weight
            recency: New recency weight
            priority: New priority weight
        """
        if relevance is not None:
            self._relevance_weight = max(0.0, min(1.0, relevance))
            self._relevance_scorer.weight = self._relevance_weight

        if recency is not None:
            self._recency_weight = max(0.0, min(1.0, recency))
            self._recency_scorer.weight = self._recency_weight

        if priority is not None:
            self._priority_weight = max(0.0, min(1.0, priority))
            self._priority_scorer.weight = self._priority_weight

    async def _get_context_lock(self, context_id: str) -> asyncio.Lock:
        """
        Get or create a lock for a specific context.

        Thread-safe access to per-context locks prevents race conditions
        when the same context is scored concurrently.

        Args:
            context_id: The context ID to get a lock for

        Returns:
            asyncio.Lock for the context
        """
        # Fast path: check if lock exists without acquiring main lock
        if context_id in self._context_locks:
            lock = self._context_locks.get(context_id)
            if lock is not None:
                return lock

        # Slow path: create lock while holding main lock
        async with self._locks_lock:
            # Double-check after acquiring lock
            if context_id in self._context_locks:
                lock = self._context_locks.get(context_id)
                if lock is not None:
                    return lock

            # Create new lock
            new_lock = asyncio.Lock()
            self._context_locks[context_id] = new_lock
            return new_lock

    async def score(
        self,
        context: BaseContext,
        query: str,
        **kwargs: Any,
    ) -> float:
        """
        Compute composite score for a context.

        Args:
            context: Context to score
            query: Query to score against
            **kwargs: Additional scoring parameters

        Returns:
            Composite score between 0.0 and 1.0
        """
        scored = await self.score_with_details(context, query, **kwargs)
        return scored.composite_score

    async def score_with_details(
        self,
        context: BaseContext,
        query: str,
        **kwargs: Any,
    ) -> ScoredContext:
        """
        Compute composite score with individual scores.

        Uses per-context locking to prevent race conditions when the same
        context is scored concurrently in parallel scoring operations.

        Args:
            context: Context to score
            query: Query to score against
            **kwargs: Additional scoring parameters

        Returns:
            ScoredContext with all scores
        """
        # Get lock for this specific context to prevent race conditions
        context_lock = await self._get_context_lock(context.id)

        async with context_lock:
            # Check if context already has a score (inside lock to prevent races)
            if context._score is not None:
                return ScoredContext(
                    context=context,
                    composite_score=context._score,
                )

            # Compute individual scores in parallel
            relevance_task = self._relevance_scorer.score(context, query, **kwargs)
            recency_task = self._recency_scorer.score(context, query, **kwargs)
            priority_task = self._priority_scorer.score(context, query, **kwargs)

            relevance_score, recency_score, priority_score = await asyncio.gather(
                relevance_task, recency_task, priority_task
            )

            # Compute weighted composite
            total_weight = (
                self._relevance_weight + self._recency_weight + self._priority_weight
            )

            if total_weight > 0:
                composite = (
                    relevance_score * self._relevance_weight
                    + recency_score * self._recency_weight
                    + priority_score * self._priority_weight
                ) / total_weight
            else:
                composite = 0.0

            # Cache the score on the context (now safe - inside lock)
            context._score = composite

            return ScoredContext(
                context=context,
                composite_score=composite,
                relevance_score=relevance_score,
                recency_score=recency_score,
                priority_score=priority_score,
            )

    async def score_batch(
        self,
        contexts: list[BaseContext],
        query: str,
        parallel: bool = True,
        **kwargs: Any,
    ) -> list[ScoredContext]:
        """
        Score multiple contexts.

        Args:
            contexts: Contexts to score
            query: Query to score against
            parallel: Whether to score in parallel
            **kwargs: Additional scoring parameters

        Returns:
            List of ScoredContext (same order as input)
        """
        if parallel:
            tasks = [
                self.score_with_details(ctx, query, **kwargs) for ctx in contexts
            ]
            return await asyncio.gather(*tasks)
        else:
            results = []
            for ctx in contexts:
                scored = await self.score_with_details(ctx, query, **kwargs)
                results.append(scored)
            return results

    async def rank(
        self,
        contexts: list[BaseContext],
        query: str,
        limit: int | None = None,
        min_score: float = 0.0,
        **kwargs: Any,
    ) -> list[ScoredContext]:
        """
        Score and rank contexts.

        Args:
            contexts: Contexts to rank
            query: Query to rank against
            limit: Maximum number of results
            min_score: Minimum score threshold
            **kwargs: Additional scoring parameters

        Returns:
            Sorted list of ScoredContext (highest first)
        """
        # Score all contexts
        scored = await self.score_batch(contexts, query, **kwargs)

        # Filter by minimum score
        if min_score > 0:
            scored = [s for s in scored if s.composite_score >= min_score]

        # Sort by score (highest first)
        scored.sort(reverse=True)

        # Apply limit
        if limit is not None:
            scored = scored[:limit]

        return scored