Files
syndarix/backend/app/services/memory/semantic/extraction.py
Felipe Cardoso e946787a61 feat(memory): add semantic memory implementation (Issue #91)
Implements semantic memory with fact storage, retrieval, and verification:

Core functionality:
- SemanticMemory class for fact storage/retrieval
- Fact storage as subject-predicate-object triples
- Duplicate detection with reinforcement
- Semantic search with text-based fallback
- Entity-based retrieval
- Confidence scoring and decay
- Conflict resolution

Supporting modules:
- FactExtractor: Pattern-based fact extraction from episodes
- FactVerifier: Contradiction detection and reliability scoring

Test coverage:
- 47 unit tests covering all modules
- extraction.py: 99% coverage
- verification.py: 95% coverage
- memory.py: 78% coverage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 02:23:06 +01:00

314 lines
9.4 KiB
Python

# app/services/memory/semantic/extraction.py
"""
Fact Extraction from Episodes.
Provides utilities for extracting semantic facts (subject-predicate-object triples)
from episodic memories and other text sources.
"""
import logging
import re
from dataclasses import dataclass, field
from typing import Any, ClassVar
from app.services.memory.types import Episode, FactCreate, Outcome
logger = logging.getLogger(__name__)
@dataclass
class ExtractionContext:
"""Context for fact extraction."""
project_id: Any | None = None
source_episode_id: Any | None = None
min_confidence: float = 0.5
max_facts_per_source: int = 10
@dataclass
class ExtractedFact:
"""A fact extracted from text before storage."""
subject: str
predicate: str
object: str
confidence: float
source_text: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
def to_fact_create(
self,
project_id: Any | None = None,
source_episode_ids: list[Any] | None = None,
) -> FactCreate:
"""Convert to FactCreate for storage."""
return FactCreate(
subject=self.subject,
predicate=self.predicate,
object=self.object,
confidence=self.confidence,
project_id=project_id,
source_episode_ids=source_episode_ids or [],
)
class FactExtractor:
"""
Extracts facts from episodes and text.
This is a rule-based extractor. In production, this would be
replaced or augmented with LLM-based extraction for better accuracy.
"""
# Common predicates we can detect
PREDICATE_PATTERNS: ClassVar[dict[str, str]] = {
"uses": r"(?:uses?|using|utilizes?)",
"requires": r"(?:requires?|needs?|depends?\s+on)",
"is_a": r"(?:is\s+a|is\s+an|are\s+a|are)",
"has": r"(?:has|have|contains?)",
"part_of": r"(?:part\s+of|belongs?\s+to|member\s+of)",
"causes": r"(?:causes?|leads?\s+to|results?\s+in)",
"prevents": r"(?:prevents?|avoids?|stops?)",
"solves": r"(?:solves?|fixes?|resolves?)",
}
def __init__(self) -> None:
"""Initialize extractor."""
self._compiled_patterns = {
pred: re.compile(pattern, re.IGNORECASE)
for pred, pattern in self.PREDICATE_PATTERNS.items()
}
def extract_from_episode(
self,
episode: Episode,
context: ExtractionContext | None = None,
) -> list[ExtractedFact]:
"""
Extract facts from an episode.
Args:
episode: Episode to extract from
context: Optional extraction context
Returns:
List of extracted facts
"""
ctx = context or ExtractionContext()
facts: list[ExtractedFact] = []
# Extract from task description
task_facts = self._extract_from_text(
episode.task_description,
source_prefix=episode.task_type,
)
facts.extend(task_facts)
# Extract from lessons learned
for lesson in episode.lessons_learned:
lesson_facts = self._extract_from_lesson(lesson, episode)
facts.extend(lesson_facts)
# Extract outcome-based facts
outcome_facts = self._extract_outcome_facts(episode)
facts.extend(outcome_facts)
# Limit and filter
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
facts = facts[: ctx.max_facts_per_source]
logger.debug(f"Extracted {len(facts)} facts from episode {episode.id}")
return facts
def _extract_from_text(
self,
text: str,
source_prefix: str = "",
) -> list[ExtractedFact]:
"""Extract facts from free-form text using pattern matching."""
facts: list[ExtractedFact] = []
if not text or len(text) < 10:
return facts
# Split into sentences
sentences = re.split(r"[.!?]+", text)
for sentence in sentences:
sentence = sentence.strip()
if len(sentence) < 10:
continue
# Try to match predicate patterns
for predicate, pattern in self._compiled_patterns.items():
match = pattern.search(sentence)
if match:
# Extract subject (text before predicate)
subject = sentence[: match.start()].strip()
# Extract object (text after predicate)
obj = sentence[match.end() :].strip()
if len(subject) > 2 and len(obj) > 2:
facts.append(
ExtractedFact(
subject=subject[:200], # Limit length
predicate=predicate,
object=obj[:500],
confidence=0.6, # Medium confidence for pattern matching
source_text=sentence,
)
)
break # One fact per sentence
return facts
def _extract_from_lesson(
self,
lesson: str,
episode: Episode,
) -> list[ExtractedFact]:
"""Extract facts from a lesson learned."""
facts: list[ExtractedFact] = []
if not lesson or len(lesson) < 10:
return facts
# Lessons are typically in the form "Always do X" or "Never do Y"
# or "When X, do Y"
# Direct lesson fact
facts.append(
ExtractedFact(
subject=episode.task_type,
predicate="lesson_learned",
object=lesson,
confidence=0.8, # High confidence for explicit lessons
source_text=lesson,
metadata={"outcome": episode.outcome.value},
)
)
# Extract conditional patterns
conditional_match = re.match(
r"(?:when|if)\s+(.+?),\s*(.+)",
lesson,
re.IGNORECASE,
)
if conditional_match:
condition, action = conditional_match.groups()
facts.append(
ExtractedFact(
subject=condition.strip(),
predicate="requires_action",
object=action.strip(),
confidence=0.7,
source_text=lesson,
)
)
# Extract "always/never" patterns
always_match = re.match(
r"(?:always)\s+(.+)",
lesson,
re.IGNORECASE,
)
if always_match:
facts.append(
ExtractedFact(
subject=episode.task_type,
predicate="best_practice",
object=always_match.group(1).strip(),
confidence=0.85,
source_text=lesson,
)
)
never_match = re.match(
r"(?:never|avoid)\s+(.+)",
lesson,
re.IGNORECASE,
)
if never_match:
facts.append(
ExtractedFact(
subject=episode.task_type,
predicate="anti_pattern",
object=never_match.group(1).strip(),
confidence=0.85,
source_text=lesson,
)
)
return facts
def _extract_outcome_facts(
self,
episode: Episode,
) -> list[ExtractedFact]:
"""Extract facts based on episode outcome."""
facts: list[ExtractedFact] = []
# Create fact based on outcome
if episode.outcome == Outcome.SUCCESS:
if episode.outcome_details:
facts.append(
ExtractedFact(
subject=episode.task_type,
predicate="successful_approach",
object=episode.outcome_details[:500],
confidence=0.75,
source_text=episode.outcome_details,
)
)
elif episode.outcome == Outcome.FAILURE:
if episode.outcome_details:
facts.append(
ExtractedFact(
subject=episode.task_type,
predicate="known_failure_mode",
object=episode.outcome_details[:500],
confidence=0.8, # High confidence for failures
source_text=episode.outcome_details,
)
)
return facts
def extract_from_text(
self,
text: str,
context: ExtractionContext | None = None,
) -> list[ExtractedFact]:
"""
Extract facts from arbitrary text.
Args:
text: Text to extract from
context: Optional extraction context
Returns:
List of extracted facts
"""
ctx = context or ExtractionContext()
facts = self._extract_from_text(text)
# Filter by confidence
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
return facts[: ctx.max_facts_per_source]
# Singleton extractor instance
_extractor: FactExtractor | None = None
def get_fact_extractor() -> FactExtractor:
"""Get the singleton fact extractor instance."""
global _extractor
if _extractor is None:
_extractor = FactExtractor()
return _extractor