Files
syndarix/backend/tests/unit/services/memory/semantic/test_extraction.py
Felipe Cardoso e946787a61 feat(memory): add semantic memory implementation (Issue #91)
Implements semantic memory with fact storage, retrieval, and verification:

Core functionality:
- SemanticMemory class for fact storage/retrieval
- Fact storage as subject-predicate-object triples
- Duplicate detection with reinforcement
- Semantic search with text-based fallback
- Entity-based retrieval
- Confidence scoring and decay
- Conflict resolution

Supporting modules:
- FactExtractor: Pattern-based fact extraction from episodes
- FactVerifier: Contradiction detection and reliability scoring

Test coverage:
- 47 unit tests covering all modules
- extraction.py: 99% coverage
- verification.py: 95% coverage
- memory.py: 78% coverage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 02:23:06 +01:00

264 lines
8.0 KiB
Python

# tests/unit/services/memory/semantic/test_extraction.py
"""Unit tests for fact extraction."""
from datetime import UTC, datetime
from uuid import uuid4
import pytest
from app.services.memory.semantic.extraction import (
ExtractedFact,
ExtractionContext,
FactExtractor,
get_fact_extractor,
)
from app.services.memory.types import Episode, Outcome
def create_test_episode(
lessons_learned: list[str] | None = None,
outcome: Outcome = Outcome.SUCCESS,
task_type: str = "code_review",
task_description: str = "Review the authentication module",
outcome_details: str = "",
) -> Episode:
"""Create a test episode for extraction tests."""
return Episode(
id=uuid4(),
project_id=uuid4(),
agent_instance_id=None,
agent_type_id=None,
session_id="test-session",
task_type=task_type,
task_description=task_description,
actions=[],
context_summary="Test context",
outcome=outcome,
outcome_details=outcome_details,
duration_seconds=60.0,
tokens_used=500,
lessons_learned=lessons_learned or [],
importance_score=0.7,
embedding=None,
occurred_at=datetime.now(UTC),
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
class TestExtractedFact:
"""Tests for ExtractedFact dataclass."""
def test_to_fact_create(self) -> None:
"""Test converting ExtractedFact to FactCreate."""
extracted = ExtractedFact(
subject="Python",
predicate="uses",
object="dynamic typing",
confidence=0.8,
)
fact_create = extracted.to_fact_create(
project_id=uuid4(),
source_episode_ids=[uuid4()],
)
assert fact_create.subject == "Python"
assert fact_create.predicate == "uses"
assert fact_create.object == "dynamic typing"
assert fact_create.confidence == 0.8
def test_to_fact_create_defaults(self) -> None:
"""Test to_fact_create with default values."""
extracted = ExtractedFact(
subject="A",
predicate="B",
object="C",
confidence=0.5,
)
fact_create = extracted.to_fact_create()
assert fact_create.project_id is None
assert fact_create.source_episode_ids == []
class TestFactExtractor:
"""Tests for FactExtractor class."""
@pytest.fixture
def extractor(self) -> FactExtractor:
"""Create a fact extractor."""
return FactExtractor()
def test_extract_from_episode_with_lessons(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting facts from episode with lessons."""
episode = create_test_episode(
lessons_learned=[
"Always validate user input before processing",
"Use parameterized queries to prevent SQL injection",
]
)
facts = extractor.extract_from_episode(episode)
assert len(facts) > 0
# Should have lesson_learned predicates
lesson_facts = [f for f in facts if f.predicate == "lesson_learned"]
assert len(lesson_facts) >= 2
def test_extract_from_episode_with_always_pattern(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting 'always' pattern lessons."""
episode = create_test_episode(
lessons_learned=["Always close file handles properly"]
)
facts = extractor.extract_from_episode(episode)
best_practices = [f for f in facts if f.predicate == "best_practice"]
assert len(best_practices) >= 1
assert any("close file handles" in f.object for f in best_practices)
def test_extract_from_episode_with_never_pattern(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting 'never' pattern lessons."""
episode = create_test_episode(
lessons_learned=["Never store passwords in plain text"]
)
facts = extractor.extract_from_episode(episode)
anti_patterns = [f for f in facts if f.predicate == "anti_pattern"]
assert len(anti_patterns) >= 1
def test_extract_from_episode_with_conditional_pattern(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting conditional lessons."""
episode = create_test_episode(
lessons_learned=["When handling errors, log the stack trace"]
)
facts = extractor.extract_from_episode(episode)
conditional = [f for f in facts if f.predicate == "requires_action"]
assert len(conditional) >= 1
def test_extract_outcome_facts_success(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting facts from successful episode."""
episode = create_test_episode(
outcome=Outcome.SUCCESS,
outcome_details="Deployed to production without issues",
)
facts = extractor.extract_from_episode(episode)
success_facts = [f for f in facts if f.predicate == "successful_approach"]
assert len(success_facts) >= 1
def test_extract_outcome_facts_failure(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting facts from failed episode."""
episode = create_test_episode(
outcome=Outcome.FAILURE,
outcome_details="Connection timeout during deployment",
)
facts = extractor.extract_from_episode(episode)
failure_facts = [f for f in facts if f.predicate == "known_failure_mode"]
assert len(failure_facts) >= 1
def test_extract_from_text_uses_pattern(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting 'uses' pattern from text."""
text = "FastAPI uses Starlette for ASGI support."
facts = extractor.extract_from_text(text)
assert len(facts) >= 1
uses_facts = [f for f in facts if f.predicate == "uses"]
assert len(uses_facts) >= 1
def test_extract_from_text_requires_pattern(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting 'requires' pattern from text."""
text = "This feature requires Python 3.10 or higher."
facts = extractor.extract_from_text(text)
requires_facts = [f for f in facts if f.predicate == "requires"]
assert len(requires_facts) >= 1
def test_extract_from_text_empty(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting from empty text."""
facts = extractor.extract_from_text("")
assert facts == []
def test_extract_from_text_short(
self,
extractor: FactExtractor,
) -> None:
"""Test extracting from too-short text."""
facts = extractor.extract_from_text("Hi.")
assert facts == []
def test_extract_with_context(
self,
extractor: FactExtractor,
) -> None:
"""Test extraction with custom context."""
episode = create_test_episode(lessons_learned=["Low confidence lesson"])
context = ExtractionContext(
min_confidence=0.9, # High threshold
max_facts_per_source=2,
)
facts = extractor.extract_from_episode(episode, context)
# Should filter out low confidence facts
for fact in facts:
assert fact.confidence >= 0.9 or len(facts) <= 2
class TestGetFactExtractor:
"""Tests for singleton getter."""
def test_get_fact_extractor_returns_instance(self) -> None:
"""Test that get_fact_extractor returns an instance."""
extractor = get_fact_extractor()
assert extractor is not None
assert isinstance(extractor, FactExtractor)
def test_get_fact_extractor_returns_same_instance(self) -> None:
"""Test that get_fact_extractor returns singleton."""
extractor1 = get_fact_extractor()
extractor2 = get_fact_extractor()
assert extractor1 is extractor2