feat(memory): add semantic memory implementation (Issue #91)

Implements semantic memory with fact storage, retrieval, and verification: Core functionality: - SemanticMemory class for fact storage/retrieval - Fact storage as subject-predicate-object triples - Duplicate detection with reinforcement - Semantic search with text-based fallback - Entity-based retrieval - Confidence scoring and decay - Conflict resolution Supporting modules: - FactExtractor: Pattern-based fact extraction from episodes - FactVerifier: Contradiction detection and reliability scoring Test coverage: - 47 unit tests covering all modules - extraction.py: 99% coverage - verification.py: 95% coverage - memory.py: 78% coverage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 02:23:06 +01:00
parent 3554efe66a
commit e946787a61
8 changed files with 2447 additions and 1 deletions
--- a/backend/app/services/memory/semantic/extraction.py
+++ b/backend/app/services/memory/semantic/extraction.py
@@ -0,0 +1,313 @@
+# app/services/memory/semantic/extraction.py
+"""
+Fact Extraction from Episodes.
+
+Provides utilities for extracting semantic facts (subject-predicate-object triples)
+from episodic memories and other text sources.
+"""
+
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Any, ClassVar
+
+from app.services.memory.types import Episode, FactCreate, Outcome
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ExtractionContext:
+    """Context for fact extraction."""
+
+    project_id: Any | None = None
+    source_episode_id: Any | None = None
+    min_confidence: float = 0.5
+    max_facts_per_source: int = 10
+
+
+@dataclass
+class ExtractedFact:
+    """A fact extracted from text before storage."""
+
+    subject: str
+    predicate: str
+    object: str
+    confidence: float
+    source_text: str = ""
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_fact_create(
+        self,
+        project_id: Any | None = None,
+        source_episode_ids: list[Any] | None = None,
+    ) -> FactCreate:
+        """Convert to FactCreate for storage."""
+        return FactCreate(
+            subject=self.subject,
+            predicate=self.predicate,
+            object=self.object,
+            confidence=self.confidence,
+            project_id=project_id,
+            source_episode_ids=source_episode_ids or [],
+        )
+
+
+class FactExtractor:
+    """
+    Extracts facts from episodes and text.
+
+    This is a rule-based extractor. In production, this would be
+    replaced or augmented with LLM-based extraction for better accuracy.
+    """
+
+    # Common predicates we can detect
+    PREDICATE_PATTERNS: ClassVar[dict[str, str]] = {
+        "uses": r"(?:uses?|using|utilizes?)",
+        "requires": r"(?:requires?|needs?|depends?\s+on)",
+        "is_a": r"(?:is\s+a|is\s+an|are\s+a|are)",
+        "has": r"(?:has|have|contains?)",
+        "part_of": r"(?:part\s+of|belongs?\s+to|member\s+of)",
+        "causes": r"(?:causes?|leads?\s+to|results?\s+in)",
+        "prevents": r"(?:prevents?|avoids?|stops?)",
+        "solves": r"(?:solves?|fixes?|resolves?)",
+    }
+
+    def __init__(self) -> None:
+        """Initialize extractor."""
+        self._compiled_patterns = {
+            pred: re.compile(pattern, re.IGNORECASE)
+            for pred, pattern in self.PREDICATE_PATTERNS.items()
+        }
+
+    def extract_from_episode(
+        self,
+        episode: Episode,
+        context: ExtractionContext | None = None,
+    ) -> list[ExtractedFact]:
+        """
+        Extract facts from an episode.
+
+        Args:
+            episode: Episode to extract from
+            context: Optional extraction context
+
+        Returns:
+            List of extracted facts
+        """
+        ctx = context or ExtractionContext()
+        facts: list[ExtractedFact] = []
+
+        # Extract from task description
+        task_facts = self._extract_from_text(
+            episode.task_description,
+            source_prefix=episode.task_type,
+        )
+        facts.extend(task_facts)
+
+        # Extract from lessons learned
+        for lesson in episode.lessons_learned:
+            lesson_facts = self._extract_from_lesson(lesson, episode)
+            facts.extend(lesson_facts)
+
+        # Extract outcome-based facts
+        outcome_facts = self._extract_outcome_facts(episode)
+        facts.extend(outcome_facts)
+
+        # Limit and filter
+        facts = [f for f in facts if f.confidence >= ctx.min_confidence]
+        facts = facts[: ctx.max_facts_per_source]
+
+        logger.debug(f"Extracted {len(facts)} facts from episode {episode.id}")
+
+        return facts
+
+    def _extract_from_text(
+        self,
+        text: str,
+        source_prefix: str = "",
+    ) -> list[ExtractedFact]:
+        """Extract facts from free-form text using pattern matching."""
+        facts: list[ExtractedFact] = []
+
+        if not text or len(text) < 10:
+            return facts
+
+        # Split into sentences
+        sentences = re.split(r"[.!?]+", text)
+
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if len(sentence) < 10:
+                continue
+
+            # Try to match predicate patterns
+            for predicate, pattern in self._compiled_patterns.items():
+                match = pattern.search(sentence)
+                if match:
+                    # Extract subject (text before predicate)
+                    subject = sentence[: match.start()].strip()
+                    # Extract object (text after predicate)
+                    obj = sentence[match.end() :].strip()
+
+                    if len(subject) > 2 and len(obj) > 2:
+                        facts.append(
+                            ExtractedFact(
+                                subject=subject[:200],  # Limit length
+                                predicate=predicate,
+                                object=obj[:500],
+                                confidence=0.6,  # Medium confidence for pattern matching
+                                source_text=sentence,
+                            )
+                        )
+                        break  # One fact per sentence
+
+        return facts
+
+    def _extract_from_lesson(
+        self,
+        lesson: str,
+        episode: Episode,
+    ) -> list[ExtractedFact]:
+        """Extract facts from a lesson learned."""
+        facts: list[ExtractedFact] = []
+
+        if not lesson or len(lesson) < 10:
+            return facts
+
+        # Lessons are typically in the form "Always do X" or "Never do Y"
+        # or "When X, do Y"
+
+        # Direct lesson fact
+        facts.append(
+            ExtractedFact(
+                subject=episode.task_type,
+                predicate="lesson_learned",
+                object=lesson,
+                confidence=0.8,  # High confidence for explicit lessons
+                source_text=lesson,
+                metadata={"outcome": episode.outcome.value},
+            )
+        )
+
+        # Extract conditional patterns
+        conditional_match = re.match(
+            r"(?:when|if)\s+(.+?),\s*(.+)",
+            lesson,
+            re.IGNORECASE,
+        )
+        if conditional_match:
+            condition, action = conditional_match.groups()
+            facts.append(
+                ExtractedFact(
+                    subject=condition.strip(),
+                    predicate="requires_action",
+                    object=action.strip(),
+                    confidence=0.7,
+                    source_text=lesson,
+                )
+            )
+
+        # Extract "always/never" patterns
+        always_match = re.match(
+            r"(?:always)\s+(.+)",
+            lesson,
+            re.IGNORECASE,
+        )
+        if always_match:
+            facts.append(
+                ExtractedFact(
+                    subject=episode.task_type,
+                    predicate="best_practice",
+                    object=always_match.group(1).strip(),
+                    confidence=0.85,
+                    source_text=lesson,
+                )
+            )
+
+        never_match = re.match(
+            r"(?:never|avoid)\s+(.+)",
+            lesson,
+            re.IGNORECASE,
+        )
+        if never_match:
+            facts.append(
+                ExtractedFact(
+                    subject=episode.task_type,
+                    predicate="anti_pattern",
+                    object=never_match.group(1).strip(),
+                    confidence=0.85,
+                    source_text=lesson,
+                )
+            )
+
+        return facts
+
+    def _extract_outcome_facts(
+        self,
+        episode: Episode,
+    ) -> list[ExtractedFact]:
+        """Extract facts based on episode outcome."""
+        facts: list[ExtractedFact] = []
+
+        # Create fact based on outcome
+        if episode.outcome == Outcome.SUCCESS:
+            if episode.outcome_details:
+                facts.append(
+                    ExtractedFact(
+                        subject=episode.task_type,
+                        predicate="successful_approach",
+                        object=episode.outcome_details[:500],
+                        confidence=0.75,
+                        source_text=episode.outcome_details,
+                    )
+                )
+        elif episode.outcome == Outcome.FAILURE:
+            if episode.outcome_details:
+                facts.append(
+                    ExtractedFact(
+                        subject=episode.task_type,
+                        predicate="known_failure_mode",
+                        object=episode.outcome_details[:500],
+                        confidence=0.8,  # High confidence for failures
+                        source_text=episode.outcome_details,
+                    )
+                )
+
+        return facts
+
+    def extract_from_text(
+        self,
+        text: str,
+        context: ExtractionContext | None = None,
+    ) -> list[ExtractedFact]:
+        """
+        Extract facts from arbitrary text.
+
+        Args:
+            text: Text to extract from
+            context: Optional extraction context
+
+        Returns:
+            List of extracted facts
+        """
+        ctx = context or ExtractionContext()
+
+        facts = self._extract_from_text(text)
+
+        # Filter by confidence
+        facts = [f for f in facts if f.confidence >= ctx.min_confidence]
+
+        return facts[: ctx.max_facts_per_source]
+
+
+# Singleton extractor instance
+_extractor: FactExtractor | None = None
+
+
+def get_fact_extractor() -> FactExtractor:
+    """Get the singleton fact extractor instance."""
+    global _extractor
+    if _extractor is None:
+        _extractor = FactExtractor()
+    return _extractor