forked from cardosofelipe/fast-next-template
Implements semantic memory with fact storage, retrieval, and verification: Core functionality: - SemanticMemory class for fact storage/retrieval - Fact storage as subject-predicate-object triples - Duplicate detection with reinforcement - Semantic search with text-based fallback - Entity-based retrieval - Confidence scoring and decay - Conflict resolution Supporting modules: - FactExtractor: Pattern-based fact extraction from episodes - FactVerifier: Contradiction detection and reliability scoring Test coverage: - 47 unit tests covering all modules - extraction.py: 99% coverage - verification.py: 95% coverage - memory.py: 78% coverage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
314 lines
9.4 KiB
Python
314 lines
9.4 KiB
Python
# app/services/memory/semantic/extraction.py
|
|
"""
|
|
Fact Extraction from Episodes.
|
|
|
|
Provides utilities for extracting semantic facts (subject-predicate-object triples)
|
|
from episodic memories and other text sources.
|
|
"""
|
|
|
|
import logging
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, ClassVar
|
|
|
|
from app.services.memory.types import Episode, FactCreate, Outcome
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ExtractionContext:
|
|
"""Context for fact extraction."""
|
|
|
|
project_id: Any | None = None
|
|
source_episode_id: Any | None = None
|
|
min_confidence: float = 0.5
|
|
max_facts_per_source: int = 10
|
|
|
|
|
|
@dataclass
|
|
class ExtractedFact:
|
|
"""A fact extracted from text before storage."""
|
|
|
|
subject: str
|
|
predicate: str
|
|
object: str
|
|
confidence: float
|
|
source_text: str = ""
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
|
|
def to_fact_create(
|
|
self,
|
|
project_id: Any | None = None,
|
|
source_episode_ids: list[Any] | None = None,
|
|
) -> FactCreate:
|
|
"""Convert to FactCreate for storage."""
|
|
return FactCreate(
|
|
subject=self.subject,
|
|
predicate=self.predicate,
|
|
object=self.object,
|
|
confidence=self.confidence,
|
|
project_id=project_id,
|
|
source_episode_ids=source_episode_ids or [],
|
|
)
|
|
|
|
|
|
class FactExtractor:
|
|
"""
|
|
Extracts facts from episodes and text.
|
|
|
|
This is a rule-based extractor. In production, this would be
|
|
replaced or augmented with LLM-based extraction for better accuracy.
|
|
"""
|
|
|
|
# Common predicates we can detect
|
|
PREDICATE_PATTERNS: ClassVar[dict[str, str]] = {
|
|
"uses": r"(?:uses?|using|utilizes?)",
|
|
"requires": r"(?:requires?|needs?|depends?\s+on)",
|
|
"is_a": r"(?:is\s+a|is\s+an|are\s+a|are)",
|
|
"has": r"(?:has|have|contains?)",
|
|
"part_of": r"(?:part\s+of|belongs?\s+to|member\s+of)",
|
|
"causes": r"(?:causes?|leads?\s+to|results?\s+in)",
|
|
"prevents": r"(?:prevents?|avoids?|stops?)",
|
|
"solves": r"(?:solves?|fixes?|resolves?)",
|
|
}
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialize extractor."""
|
|
self._compiled_patterns = {
|
|
pred: re.compile(pattern, re.IGNORECASE)
|
|
for pred, pattern in self.PREDICATE_PATTERNS.items()
|
|
}
|
|
|
|
def extract_from_episode(
|
|
self,
|
|
episode: Episode,
|
|
context: ExtractionContext | None = None,
|
|
) -> list[ExtractedFact]:
|
|
"""
|
|
Extract facts from an episode.
|
|
|
|
Args:
|
|
episode: Episode to extract from
|
|
context: Optional extraction context
|
|
|
|
Returns:
|
|
List of extracted facts
|
|
"""
|
|
ctx = context or ExtractionContext()
|
|
facts: list[ExtractedFact] = []
|
|
|
|
# Extract from task description
|
|
task_facts = self._extract_from_text(
|
|
episode.task_description,
|
|
source_prefix=episode.task_type,
|
|
)
|
|
facts.extend(task_facts)
|
|
|
|
# Extract from lessons learned
|
|
for lesson in episode.lessons_learned:
|
|
lesson_facts = self._extract_from_lesson(lesson, episode)
|
|
facts.extend(lesson_facts)
|
|
|
|
# Extract outcome-based facts
|
|
outcome_facts = self._extract_outcome_facts(episode)
|
|
facts.extend(outcome_facts)
|
|
|
|
# Limit and filter
|
|
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
|
|
facts = facts[: ctx.max_facts_per_source]
|
|
|
|
logger.debug(f"Extracted {len(facts)} facts from episode {episode.id}")
|
|
|
|
return facts
|
|
|
|
def _extract_from_text(
|
|
self,
|
|
text: str,
|
|
source_prefix: str = "",
|
|
) -> list[ExtractedFact]:
|
|
"""Extract facts from free-form text using pattern matching."""
|
|
facts: list[ExtractedFact] = []
|
|
|
|
if not text or len(text) < 10:
|
|
return facts
|
|
|
|
# Split into sentences
|
|
sentences = re.split(r"[.!?]+", text)
|
|
|
|
for sentence in sentences:
|
|
sentence = sentence.strip()
|
|
if len(sentence) < 10:
|
|
continue
|
|
|
|
# Try to match predicate patterns
|
|
for predicate, pattern in self._compiled_patterns.items():
|
|
match = pattern.search(sentence)
|
|
if match:
|
|
# Extract subject (text before predicate)
|
|
subject = sentence[: match.start()].strip()
|
|
# Extract object (text after predicate)
|
|
obj = sentence[match.end() :].strip()
|
|
|
|
if len(subject) > 2 and len(obj) > 2:
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=subject[:200], # Limit length
|
|
predicate=predicate,
|
|
object=obj[:500],
|
|
confidence=0.6, # Medium confidence for pattern matching
|
|
source_text=sentence,
|
|
)
|
|
)
|
|
break # One fact per sentence
|
|
|
|
return facts
|
|
|
|
def _extract_from_lesson(
|
|
self,
|
|
lesson: str,
|
|
episode: Episode,
|
|
) -> list[ExtractedFact]:
|
|
"""Extract facts from a lesson learned."""
|
|
facts: list[ExtractedFact] = []
|
|
|
|
if not lesson or len(lesson) < 10:
|
|
return facts
|
|
|
|
# Lessons are typically in the form "Always do X" or "Never do Y"
|
|
# or "When X, do Y"
|
|
|
|
# Direct lesson fact
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=episode.task_type,
|
|
predicate="lesson_learned",
|
|
object=lesson,
|
|
confidence=0.8, # High confidence for explicit lessons
|
|
source_text=lesson,
|
|
metadata={"outcome": episode.outcome.value},
|
|
)
|
|
)
|
|
|
|
# Extract conditional patterns
|
|
conditional_match = re.match(
|
|
r"(?:when|if)\s+(.+?),\s*(.+)",
|
|
lesson,
|
|
re.IGNORECASE,
|
|
)
|
|
if conditional_match:
|
|
condition, action = conditional_match.groups()
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=condition.strip(),
|
|
predicate="requires_action",
|
|
object=action.strip(),
|
|
confidence=0.7,
|
|
source_text=lesson,
|
|
)
|
|
)
|
|
|
|
# Extract "always/never" patterns
|
|
always_match = re.match(
|
|
r"(?:always)\s+(.+)",
|
|
lesson,
|
|
re.IGNORECASE,
|
|
)
|
|
if always_match:
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=episode.task_type,
|
|
predicate="best_practice",
|
|
object=always_match.group(1).strip(),
|
|
confidence=0.85,
|
|
source_text=lesson,
|
|
)
|
|
)
|
|
|
|
never_match = re.match(
|
|
r"(?:never|avoid)\s+(.+)",
|
|
lesson,
|
|
re.IGNORECASE,
|
|
)
|
|
if never_match:
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=episode.task_type,
|
|
predicate="anti_pattern",
|
|
object=never_match.group(1).strip(),
|
|
confidence=0.85,
|
|
source_text=lesson,
|
|
)
|
|
)
|
|
|
|
return facts
|
|
|
|
def _extract_outcome_facts(
|
|
self,
|
|
episode: Episode,
|
|
) -> list[ExtractedFact]:
|
|
"""Extract facts based on episode outcome."""
|
|
facts: list[ExtractedFact] = []
|
|
|
|
# Create fact based on outcome
|
|
if episode.outcome == Outcome.SUCCESS:
|
|
if episode.outcome_details:
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=episode.task_type,
|
|
predicate="successful_approach",
|
|
object=episode.outcome_details[:500],
|
|
confidence=0.75,
|
|
source_text=episode.outcome_details,
|
|
)
|
|
)
|
|
elif episode.outcome == Outcome.FAILURE:
|
|
if episode.outcome_details:
|
|
facts.append(
|
|
ExtractedFact(
|
|
subject=episode.task_type,
|
|
predicate="known_failure_mode",
|
|
object=episode.outcome_details[:500],
|
|
confidence=0.8, # High confidence for failures
|
|
source_text=episode.outcome_details,
|
|
)
|
|
)
|
|
|
|
return facts
|
|
|
|
def extract_from_text(
|
|
self,
|
|
text: str,
|
|
context: ExtractionContext | None = None,
|
|
) -> list[ExtractedFact]:
|
|
"""
|
|
Extract facts from arbitrary text.
|
|
|
|
Args:
|
|
text: Text to extract from
|
|
context: Optional extraction context
|
|
|
|
Returns:
|
|
List of extracted facts
|
|
"""
|
|
ctx = context or ExtractionContext()
|
|
|
|
facts = self._extract_from_text(text)
|
|
|
|
# Filter by confidence
|
|
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
|
|
|
|
return facts[: ctx.max_facts_per_source]
|
|
|
|
|
|
# Singleton extractor instance
|
|
_extractor: FactExtractor | None = None
|
|
|
|
|
|
def get_fact_extractor() -> FactExtractor:
|
|
"""Get the singleton fact extractor instance."""
|
|
global _extractor
|
|
if _extractor is None:
|
|
_extractor = FactExtractor()
|
|
return _extractor
|