feat(memory): add semantic memory implementation (Issue #91)

Implements semantic memory with fact storage, retrieval, and verification: Core functionality: - SemanticMemory class for fact storage/retrieval - Fact storage as subject-predicate-object triples - Duplicate detection with reinforcement - Semantic search with text-based fallback - Entity-based retrieval - Confidence scoring and decay - Conflict resolution Supporting modules: - FactExtractor: Pattern-based fact extraction from episodes - FactVerifier: Contradiction detection and reliability scoring Test coverage: - 47 unit tests covering all modules - extraction.py: 99% coverage - verification.py: 95% coverage - memory.py: 78% coverage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 02:23:06 +01:00
parent 3554efe66a
commit e946787a61
8 changed files with 2447 additions and 1 deletions
--- a/backend/app/services/memory/semantic/verification.py
+++ b/backend/app/services/memory/semantic/verification.py
@@ -0,0 +1,363 @@
+# app/services/memory/semantic/verification.py
+"""
+Fact Verification.
+
+Provides utilities for verifying facts, detecting conflicts,
+and managing fact consistency.
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, ClassVar
+from uuid import UUID
+
+from sqlalchemy import and_, or_, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.memory.fact import Fact as FactModel
+from app.services.memory.types import Fact
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VerificationResult:
+    """Result of fact verification."""
+
+    is_valid: bool
+    confidence_adjustment: float = 0.0
+    conflicts: list["FactConflict"] = field(default_factory=list)
+    supporting_facts: list[Fact] = field(default_factory=list)
+    messages: list[str] = field(default_factory=list)
+
+
+@dataclass
+class FactConflict:
+    """Represents a conflict between two facts."""
+
+    fact_a_id: UUID
+    fact_b_id: UUID
+    conflict_type: str  # "contradiction", "superseded", "partial_overlap"
+    description: str
+    suggested_resolution: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "fact_a_id": str(self.fact_a_id),
+            "fact_b_id": str(self.fact_b_id),
+            "conflict_type": self.conflict_type,
+            "description": self.description,
+            "suggested_resolution": self.suggested_resolution,
+        }
+
+
+class FactVerifier:
+    """
+    Verifies facts and detects conflicts.
+
+    Provides methods to:
+    - Check if a fact conflicts with existing facts
+    - Find supporting evidence for a fact
+    - Detect contradictions in the fact base
+    """
+
+    # Predicates that are opposites/contradictions
+    CONTRADICTORY_PREDICATES: ClassVar[set[tuple[str, str]]] = {
+        ("uses", "does_not_use"),
+        ("requires", "does_not_require"),
+        ("is_a", "is_not_a"),
+        ("causes", "prevents"),
+        ("allows", "prevents"),
+        ("supports", "does_not_support"),
+        ("best_practice", "anti_pattern"),
+    }
+
+    def __init__(self, session: AsyncSession) -> None:
+        """Initialize verifier with database session."""
+        self._session = session
+
+    async def verify_fact(
+        self,
+        subject: str,
+        predicate: str,
+        obj: str,
+        project_id: UUID | None = None,
+    ) -> VerificationResult:
+        """
+        Verify a fact against existing facts.
+
+        Args:
+            subject: Fact subject
+            predicate: Fact predicate
+            obj: Fact object
+            project_id: Optional project scope
+
+        Returns:
+            VerificationResult with verification details
+        """
+        result = VerificationResult(is_valid=True)
+
+        # Check for direct contradictions
+        conflicts = await self._find_contradictions(
+            subject=subject,
+            predicate=predicate,
+            obj=obj,
+            project_id=project_id,
+        )
+        result.conflicts = conflicts
+
+        if conflicts:
+            result.is_valid = False
+            result.messages.append(f"Found {len(conflicts)} conflicting fact(s)")
+            # Reduce confidence based on conflicts
+            result.confidence_adjustment = -0.1 * len(conflicts)
+
+        # Find supporting facts
+        supporting = await self._find_supporting_facts(
+            subject=subject,
+            predicate=predicate,
+            project_id=project_id,
+        )
+        result.supporting_facts = supporting
+
+        if supporting:
+            result.messages.append(f"Found {len(supporting)} supporting fact(s)")
+            # Boost confidence based on support
+            result.confidence_adjustment += 0.05 * min(len(supporting), 3)
+
+        return result
+
+    async def _find_contradictions(
+        self,
+        subject: str,
+        predicate: str,
+        obj: str,
+        project_id: UUID | None = None,
+    ) -> list[FactConflict]:
+        """Find facts that contradict the given fact."""
+        conflicts: list[FactConflict] = []
+
+        # Find opposite predicates
+        opposite_predicates = self._get_opposite_predicates(predicate)
+
+        if not opposite_predicates:
+            return conflicts
+
+        # Search for contradicting facts
+        query = select(FactModel).where(
+            and_(
+                FactModel.subject == subject,
+                FactModel.predicate.in_(opposite_predicates),
+            )
+        )
+
+        if project_id is not None:
+            query = query.where(
+                or_(
+                    FactModel.project_id == project_id,
+                    FactModel.project_id.is_(None),
+                )
+            )
+
+        result = await self._session.execute(query)
+        models = list(result.scalars().all())
+
+        for model in models:
+            conflicts.append(
+                FactConflict(
+                    fact_a_id=model.id,  # type: ignore[arg-type]
+                    fact_b_id=UUID(
+                        "00000000-0000-0000-0000-000000000000"
+                    ),  # Placeholder for new fact
+                    conflict_type="contradiction",
+                    description=(
+                        f"'{subject} {predicate} {obj}' contradicts "
+                        f"'{model.subject} {model.predicate} {model.object}'"
+                    ),
+                    suggested_resolution="Keep fact with higher confidence",
+                )
+            )
+
+        return conflicts
+
+    def _get_opposite_predicates(self, predicate: str) -> list[str]:
+        """Get predicates that are opposite to the given predicate."""
+        opposites: list[str] = []
+
+        for pair in self.CONTRADICTORY_PREDICATES:
+            if predicate in pair:
+                opposites.extend(p for p in pair if p != predicate)
+
+        return opposites
+
+    async def _find_supporting_facts(
+        self,
+        subject: str,
+        predicate: str,
+        project_id: UUID | None = None,
+    ) -> list[Fact]:
+        """Find facts that support the given fact."""
+        # Find facts with same subject and predicate
+        query = (
+            select(FactModel)
+            .where(
+                and_(
+                    FactModel.subject == subject,
+                    FactModel.predicate == predicate,
+                    FactModel.confidence >= 0.5,
+                )
+            )
+            .limit(10)
+        )
+
+        if project_id is not None:
+            query = query.where(
+                or_(
+                    FactModel.project_id == project_id,
+                    FactModel.project_id.is_(None),
+                )
+            )
+
+        result = await self._session.execute(query)
+        models = list(result.scalars().all())
+
+        return [self._model_to_fact(m) for m in models]
+
+    async def find_all_conflicts(
+        self,
+        project_id: UUID | None = None,
+    ) -> list[FactConflict]:
+        """
+        Find all conflicts in the fact base.
+
+        Args:
+            project_id: Optional project scope
+
+        Returns:
+            List of all detected conflicts
+        """
+        conflicts: list[FactConflict] = []
+
+        # Get all facts
+        query = select(FactModel)
+        if project_id is not None:
+            query = query.where(
+                or_(
+                    FactModel.project_id == project_id,
+                    FactModel.project_id.is_(None),
+                )
+            )
+
+        result = await self._session.execute(query)
+        models = list(result.scalars().all())
+
+        # Check each pair for conflicts
+        for i, fact_a in enumerate(models):
+            for fact_b in models[i + 1 :]:
+                conflict = self._check_pair_conflict(fact_a, fact_b)
+                if conflict:
+                    conflicts.append(conflict)
+
+        logger.info(f"Found {len(conflicts)} conflicts in fact base")
+
+        return conflicts
+
+    def _check_pair_conflict(
+        self,
+        fact_a: FactModel,
+        fact_b: FactModel,
+    ) -> FactConflict | None:
+        """Check if two facts conflict."""
+        # Same subject?
+        if fact_a.subject != fact_b.subject:
+            return None
+
+        # Contradictory predicates?
+        opposite = self._get_opposite_predicates(fact_a.predicate)  # type: ignore[arg-type]
+        if fact_b.predicate not in opposite:
+            return None
+
+        return FactConflict(
+            fact_a_id=fact_a.id,  # type: ignore[arg-type]
+            fact_b_id=fact_b.id,  # type: ignore[arg-type]
+            conflict_type="contradiction",
+            description=(
+                f"'{fact_a.subject} {fact_a.predicate} {fact_a.object}' "
+                f"contradicts '{fact_b.subject} {fact_b.predicate} {fact_b.object}'"
+            ),
+            suggested_resolution="Deprecate fact with lower confidence",
+        )
+
+    async def get_fact_reliability_score(
+        self,
+        fact_id: UUID,
+    ) -> float:
+        """
+        Calculate a reliability score for a fact.
+
+        Based on:
+        - Confidence score
+        - Number of reinforcements
+        - Number of supporting facts
+        - Absence of conflicts
+
+        Args:
+            fact_id: Fact to score
+
+        Returns:
+            Reliability score (0.0 to 1.0)
+        """
+        query = select(FactModel).where(FactModel.id == fact_id)
+        result = await self._session.execute(query)
+        model = result.scalar_one_or_none()
+
+        if model is None:
+            return 0.0
+
+        # Base score from confidence - explicitly typed to avoid Column type issues
+        score: float = float(model.confidence)
+
+        # Boost for reinforcements (diminishing returns)
+        reinforcement_boost = min(0.2, float(model.reinforcement_count) * 0.02)
+        score += reinforcement_boost
+
+        # Find supporting facts
+        supporting = await self._find_supporting_facts(
+            subject=model.subject,  # type: ignore[arg-type]
+            predicate=model.predicate,  # type: ignore[arg-type]
+            project_id=model.project_id,  # type: ignore[arg-type]
+        )
+        support_boost = min(0.1, len(supporting) * 0.02)
+        score += support_boost
+
+        # Check for conflicts
+        conflicts = await self._find_contradictions(
+            subject=model.subject,  # type: ignore[arg-type]
+            predicate=model.predicate,  # type: ignore[arg-type]
+            obj=model.object,  # type: ignore[arg-type]
+            project_id=model.project_id,  # type: ignore[arg-type]
+        )
+        conflict_penalty = min(0.3, len(conflicts) * 0.1)
+        score -= conflict_penalty
+
+        # Clamp to valid range
+        return max(0.0, min(1.0, score))
+
+    def _model_to_fact(self, model: FactModel) -> Fact:
+        """Convert SQLAlchemy model to Fact dataclass."""
+        return Fact(
+            id=model.id,  # type: ignore[arg-type]
+            project_id=model.project_id,  # type: ignore[arg-type]
+            subject=model.subject,  # type: ignore[arg-type]
+            predicate=model.predicate,  # type: ignore[arg-type]
+            object=model.object,  # type: ignore[arg-type]
+            confidence=model.confidence,  # type: ignore[arg-type]
+            source_episode_ids=model.source_episode_ids or [],  # type: ignore[arg-type]
+            first_learned=model.first_learned,  # type: ignore[arg-type]
+            last_reinforced=model.last_reinforced,  # type: ignore[arg-type]
+            reinforcement_count=model.reinforcement_count,  # type: ignore[arg-type]
+            embedding=None,
+            created_at=model.created_at,  # type: ignore[arg-type]
+            updated_at=model.updated_at,  # type: ignore[arg-type]
+        )