forked from cardosofelipe/fast-next-template
feat(memory): add semantic memory implementation (Issue #91)
Implements semantic memory with fact storage, retrieval, and verification: Core functionality: - SemanticMemory class for fact storage/retrieval - Fact storage as subject-predicate-object triples - Duplicate detection with reinforcement - Semantic search with text-based fallback - Entity-based retrieval - Confidence scoring and decay - Conflict resolution Supporting modules: - FactExtractor: Pattern-based fact extraction from episodes - FactVerifier: Contradiction detection and reliability scoring Test coverage: - 47 unit tests covering all modules - extraction.py: 99% coverage - verification.py: 95% coverage - memory.py: 78% coverage 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
313
backend/app/services/memory/semantic/extraction.py
Normal file
313
backend/app/services/memory/semantic/extraction.py
Normal file
@@ -0,0 +1,313 @@
|
||||
# app/services/memory/semantic/extraction.py
|
||||
"""
|
||||
Fact Extraction from Episodes.
|
||||
|
||||
Provides utilities for extracting semantic facts (subject-predicate-object triples)
|
||||
from episodic memories and other text sources.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, ClassVar
|
||||
|
||||
from app.services.memory.types import Episode, FactCreate, Outcome
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionContext:
|
||||
"""Context for fact extraction."""
|
||||
|
||||
project_id: Any | None = None
|
||||
source_episode_id: Any | None = None
|
||||
min_confidence: float = 0.5
|
||||
max_facts_per_source: int = 10
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractedFact:
|
||||
"""A fact extracted from text before storage."""
|
||||
|
||||
subject: str
|
||||
predicate: str
|
||||
object: str
|
||||
confidence: float
|
||||
source_text: str = ""
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_fact_create(
|
||||
self,
|
||||
project_id: Any | None = None,
|
||||
source_episode_ids: list[Any] | None = None,
|
||||
) -> FactCreate:
|
||||
"""Convert to FactCreate for storage."""
|
||||
return FactCreate(
|
||||
subject=self.subject,
|
||||
predicate=self.predicate,
|
||||
object=self.object,
|
||||
confidence=self.confidence,
|
||||
project_id=project_id,
|
||||
source_episode_ids=source_episode_ids or [],
|
||||
)
|
||||
|
||||
|
||||
class FactExtractor:
|
||||
"""
|
||||
Extracts facts from episodes and text.
|
||||
|
||||
This is a rule-based extractor. In production, this would be
|
||||
replaced or augmented with LLM-based extraction for better accuracy.
|
||||
"""
|
||||
|
||||
# Common predicates we can detect
|
||||
PREDICATE_PATTERNS: ClassVar[dict[str, str]] = {
|
||||
"uses": r"(?:uses?|using|utilizes?)",
|
||||
"requires": r"(?:requires?|needs?|depends?\s+on)",
|
||||
"is_a": r"(?:is\s+a|is\s+an|are\s+a|are)",
|
||||
"has": r"(?:has|have|contains?)",
|
||||
"part_of": r"(?:part\s+of|belongs?\s+to|member\s+of)",
|
||||
"causes": r"(?:causes?|leads?\s+to|results?\s+in)",
|
||||
"prevents": r"(?:prevents?|avoids?|stops?)",
|
||||
"solves": r"(?:solves?|fixes?|resolves?)",
|
||||
}
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize extractor."""
|
||||
self._compiled_patterns = {
|
||||
pred: re.compile(pattern, re.IGNORECASE)
|
||||
for pred, pattern in self.PREDICATE_PATTERNS.items()
|
||||
}
|
||||
|
||||
def extract_from_episode(
|
||||
self,
|
||||
episode: Episode,
|
||||
context: ExtractionContext | None = None,
|
||||
) -> list[ExtractedFact]:
|
||||
"""
|
||||
Extract facts from an episode.
|
||||
|
||||
Args:
|
||||
episode: Episode to extract from
|
||||
context: Optional extraction context
|
||||
|
||||
Returns:
|
||||
List of extracted facts
|
||||
"""
|
||||
ctx = context or ExtractionContext()
|
||||
facts: list[ExtractedFact] = []
|
||||
|
||||
# Extract from task description
|
||||
task_facts = self._extract_from_text(
|
||||
episode.task_description,
|
||||
source_prefix=episode.task_type,
|
||||
)
|
||||
facts.extend(task_facts)
|
||||
|
||||
# Extract from lessons learned
|
||||
for lesson in episode.lessons_learned:
|
||||
lesson_facts = self._extract_from_lesson(lesson, episode)
|
||||
facts.extend(lesson_facts)
|
||||
|
||||
# Extract outcome-based facts
|
||||
outcome_facts = self._extract_outcome_facts(episode)
|
||||
facts.extend(outcome_facts)
|
||||
|
||||
# Limit and filter
|
||||
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
|
||||
facts = facts[: ctx.max_facts_per_source]
|
||||
|
||||
logger.debug(f"Extracted {len(facts)} facts from episode {episode.id}")
|
||||
|
||||
return facts
|
||||
|
||||
def _extract_from_text(
|
||||
self,
|
||||
text: str,
|
||||
source_prefix: str = "",
|
||||
) -> list[ExtractedFact]:
|
||||
"""Extract facts from free-form text using pattern matching."""
|
||||
facts: list[ExtractedFact] = []
|
||||
|
||||
if not text or len(text) < 10:
|
||||
return facts
|
||||
|
||||
# Split into sentences
|
||||
sentences = re.split(r"[.!?]+", text)
|
||||
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) < 10:
|
||||
continue
|
||||
|
||||
# Try to match predicate patterns
|
||||
for predicate, pattern in self._compiled_patterns.items():
|
||||
match = pattern.search(sentence)
|
||||
if match:
|
||||
# Extract subject (text before predicate)
|
||||
subject = sentence[: match.start()].strip()
|
||||
# Extract object (text after predicate)
|
||||
obj = sentence[match.end() :].strip()
|
||||
|
||||
if len(subject) > 2 and len(obj) > 2:
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=subject[:200], # Limit length
|
||||
predicate=predicate,
|
||||
object=obj[:500],
|
||||
confidence=0.6, # Medium confidence for pattern matching
|
||||
source_text=sentence,
|
||||
)
|
||||
)
|
||||
break # One fact per sentence
|
||||
|
||||
return facts
|
||||
|
||||
def _extract_from_lesson(
|
||||
self,
|
||||
lesson: str,
|
||||
episode: Episode,
|
||||
) -> list[ExtractedFact]:
|
||||
"""Extract facts from a lesson learned."""
|
||||
facts: list[ExtractedFact] = []
|
||||
|
||||
if not lesson or len(lesson) < 10:
|
||||
return facts
|
||||
|
||||
# Lessons are typically in the form "Always do X" or "Never do Y"
|
||||
# or "When X, do Y"
|
||||
|
||||
# Direct lesson fact
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=episode.task_type,
|
||||
predicate="lesson_learned",
|
||||
object=lesson,
|
||||
confidence=0.8, # High confidence for explicit lessons
|
||||
source_text=lesson,
|
||||
metadata={"outcome": episode.outcome.value},
|
||||
)
|
||||
)
|
||||
|
||||
# Extract conditional patterns
|
||||
conditional_match = re.match(
|
||||
r"(?:when|if)\s+(.+?),\s*(.+)",
|
||||
lesson,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if conditional_match:
|
||||
condition, action = conditional_match.groups()
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=condition.strip(),
|
||||
predicate="requires_action",
|
||||
object=action.strip(),
|
||||
confidence=0.7,
|
||||
source_text=lesson,
|
||||
)
|
||||
)
|
||||
|
||||
# Extract "always/never" patterns
|
||||
always_match = re.match(
|
||||
r"(?:always)\s+(.+)",
|
||||
lesson,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if always_match:
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=episode.task_type,
|
||||
predicate="best_practice",
|
||||
object=always_match.group(1).strip(),
|
||||
confidence=0.85,
|
||||
source_text=lesson,
|
||||
)
|
||||
)
|
||||
|
||||
never_match = re.match(
|
||||
r"(?:never|avoid)\s+(.+)",
|
||||
lesson,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if never_match:
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=episode.task_type,
|
||||
predicate="anti_pattern",
|
||||
object=never_match.group(1).strip(),
|
||||
confidence=0.85,
|
||||
source_text=lesson,
|
||||
)
|
||||
)
|
||||
|
||||
return facts
|
||||
|
||||
def _extract_outcome_facts(
|
||||
self,
|
||||
episode: Episode,
|
||||
) -> list[ExtractedFact]:
|
||||
"""Extract facts based on episode outcome."""
|
||||
facts: list[ExtractedFact] = []
|
||||
|
||||
# Create fact based on outcome
|
||||
if episode.outcome == Outcome.SUCCESS:
|
||||
if episode.outcome_details:
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=episode.task_type,
|
||||
predicate="successful_approach",
|
||||
object=episode.outcome_details[:500],
|
||||
confidence=0.75,
|
||||
source_text=episode.outcome_details,
|
||||
)
|
||||
)
|
||||
elif episode.outcome == Outcome.FAILURE:
|
||||
if episode.outcome_details:
|
||||
facts.append(
|
||||
ExtractedFact(
|
||||
subject=episode.task_type,
|
||||
predicate="known_failure_mode",
|
||||
object=episode.outcome_details[:500],
|
||||
confidence=0.8, # High confidence for failures
|
||||
source_text=episode.outcome_details,
|
||||
)
|
||||
)
|
||||
|
||||
return facts
|
||||
|
||||
def extract_from_text(
|
||||
self,
|
||||
text: str,
|
||||
context: ExtractionContext | None = None,
|
||||
) -> list[ExtractedFact]:
|
||||
"""
|
||||
Extract facts from arbitrary text.
|
||||
|
||||
Args:
|
||||
text: Text to extract from
|
||||
context: Optional extraction context
|
||||
|
||||
Returns:
|
||||
List of extracted facts
|
||||
"""
|
||||
ctx = context or ExtractionContext()
|
||||
|
||||
facts = self._extract_from_text(text)
|
||||
|
||||
# Filter by confidence
|
||||
facts = [f for f in facts if f.confidence >= ctx.min_confidence]
|
||||
|
||||
return facts[: ctx.max_facts_per_source]
|
||||
|
||||
|
||||
# Singleton extractor instance
|
||||
_extractor: FactExtractor | None = None
|
||||
|
||||
|
||||
def get_fact_extractor() -> FactExtractor:
|
||||
"""Get the singleton fact extractor instance."""
|
||||
global _extractor
|
||||
if _extractor is None:
|
||||
_extractor = FactExtractor()
|
||||
return _extractor
|
||||
Reference in New Issue
Block a user