# tests/unit/services/memory/semantic/test_extraction.py """Unit tests for fact extraction.""" from datetime import UTC, datetime from uuid import uuid4 import pytest from app.services.memory.semantic.extraction import ( ExtractedFact, ExtractionContext, FactExtractor, get_fact_extractor, ) from app.services.memory.types import Episode, Outcome def create_test_episode( lessons_learned: list[str] | None = None, outcome: Outcome = Outcome.SUCCESS, task_type: str = "code_review", task_description: str = "Review the authentication module", outcome_details: str = "", ) -> Episode: """Create a test episode for extraction tests.""" return Episode( id=uuid4(), project_id=uuid4(), agent_instance_id=None, agent_type_id=None, session_id="test-session", task_type=task_type, task_description=task_description, actions=[], context_summary="Test context", outcome=outcome, outcome_details=outcome_details, duration_seconds=60.0, tokens_used=500, lessons_learned=lessons_learned or [], importance_score=0.7, embedding=None, occurred_at=datetime.now(UTC), created_at=datetime.now(UTC), updated_at=datetime.now(UTC), ) class TestExtractedFact: """Tests for ExtractedFact dataclass.""" def test_to_fact_create(self) -> None: """Test converting ExtractedFact to FactCreate.""" extracted = ExtractedFact( subject="Python", predicate="uses", object="dynamic typing", confidence=0.8, ) fact_create = extracted.to_fact_create( project_id=uuid4(), source_episode_ids=[uuid4()], ) assert fact_create.subject == "Python" assert fact_create.predicate == "uses" assert fact_create.object == "dynamic typing" assert fact_create.confidence == 0.8 def test_to_fact_create_defaults(self) -> None: """Test to_fact_create with default values.""" extracted = ExtractedFact( subject="A", predicate="B", object="C", confidence=0.5, ) fact_create = extracted.to_fact_create() assert fact_create.project_id is None assert fact_create.source_episode_ids == [] class TestFactExtractor: """Tests for FactExtractor class.""" @pytest.fixture def extractor(self) -> FactExtractor: """Create a fact extractor.""" return FactExtractor() def test_extract_from_episode_with_lessons( self, extractor: FactExtractor, ) -> None: """Test extracting facts from episode with lessons.""" episode = create_test_episode( lessons_learned=[ "Always validate user input before processing", "Use parameterized queries to prevent SQL injection", ] ) facts = extractor.extract_from_episode(episode) assert len(facts) > 0 # Should have lesson_learned predicates lesson_facts = [f for f in facts if f.predicate == "lesson_learned"] assert len(lesson_facts) >= 2 def test_extract_from_episode_with_always_pattern( self, extractor: FactExtractor, ) -> None: """Test extracting 'always' pattern lessons.""" episode = create_test_episode( lessons_learned=["Always close file handles properly"] ) facts = extractor.extract_from_episode(episode) best_practices = [f for f in facts if f.predicate == "best_practice"] assert len(best_practices) >= 1 assert any("close file handles" in f.object for f in best_practices) def test_extract_from_episode_with_never_pattern( self, extractor: FactExtractor, ) -> None: """Test extracting 'never' pattern lessons.""" episode = create_test_episode( lessons_learned=["Never store passwords in plain text"] ) facts = extractor.extract_from_episode(episode) anti_patterns = [f for f in facts if f.predicate == "anti_pattern"] assert len(anti_patterns) >= 1 def test_extract_from_episode_with_conditional_pattern( self, extractor: FactExtractor, ) -> None: """Test extracting conditional lessons.""" episode = create_test_episode( lessons_learned=["When handling errors, log the stack trace"] ) facts = extractor.extract_from_episode(episode) conditional = [f for f in facts if f.predicate == "requires_action"] assert len(conditional) >= 1 def test_extract_outcome_facts_success( self, extractor: FactExtractor, ) -> None: """Test extracting facts from successful episode.""" episode = create_test_episode( outcome=Outcome.SUCCESS, outcome_details="Deployed to production without issues", ) facts = extractor.extract_from_episode(episode) success_facts = [f for f in facts if f.predicate == "successful_approach"] assert len(success_facts) >= 1 def test_extract_outcome_facts_failure( self, extractor: FactExtractor, ) -> None: """Test extracting facts from failed episode.""" episode = create_test_episode( outcome=Outcome.FAILURE, outcome_details="Connection timeout during deployment", ) facts = extractor.extract_from_episode(episode) failure_facts = [f for f in facts if f.predicate == "known_failure_mode"] assert len(failure_facts) >= 1 def test_extract_from_text_uses_pattern( self, extractor: FactExtractor, ) -> None: """Test extracting 'uses' pattern from text.""" text = "FastAPI uses Starlette for ASGI support." facts = extractor.extract_from_text(text) assert len(facts) >= 1 uses_facts = [f for f in facts if f.predicate == "uses"] assert len(uses_facts) >= 1 def test_extract_from_text_requires_pattern( self, extractor: FactExtractor, ) -> None: """Test extracting 'requires' pattern from text.""" text = "This feature requires Python 3.10 or higher." facts = extractor.extract_from_text(text) requires_facts = [f for f in facts if f.predicate == "requires"] assert len(requires_facts) >= 1 def test_extract_from_text_empty( self, extractor: FactExtractor, ) -> None: """Test extracting from empty text.""" facts = extractor.extract_from_text("") assert facts == [] def test_extract_from_text_short( self, extractor: FactExtractor, ) -> None: """Test extracting from too-short text.""" facts = extractor.extract_from_text("Hi.") assert facts == [] def test_extract_with_context( self, extractor: FactExtractor, ) -> None: """Test extraction with custom context.""" episode = create_test_episode(lessons_learned=["Low confidence lesson"]) context = ExtractionContext( min_confidence=0.9, # High threshold max_facts_per_source=2, ) facts = extractor.extract_from_episode(episode, context) # Should filter out low confidence facts for fact in facts: assert fact.confidence >= 0.9 or len(facts) <= 2 class TestGetFactExtractor: """Tests for singleton getter.""" def test_get_fact_extractor_returns_instance(self) -> None: """Test that get_fact_extractor returns an instance.""" extractor = get_fact_extractor() assert extractor is not None assert isinstance(extractor, FactExtractor) def test_get_fact_extractor_returns_same_instance(self) -> None: """Test that get_fact_extractor returns singleton.""" extractor1 = get_fact_extractor() extractor2 = get_fact_extractor() assert extractor1 is extractor2