# tests/unit/services/memory/reflection/test_service.py """Tests for Memory Reflection service.""" from datetime import UTC, datetime, timedelta from unittest.mock import AsyncMock, MagicMock from uuid import uuid4 import pytest from app.services.memory.reflection.service import ( MemoryReflection, ReflectionConfig, get_memory_reflection, reset_memory_reflection, ) from app.services.memory.reflection.types import ( AnomalyType, FactorType, InsightType, PatternType, TimeRange, ) from app.services.memory.types import Episode, Outcome pytestmark = pytest.mark.asyncio(loop_scope="function") def create_mock_episode( task_type: str = "test_task", outcome: Outcome = Outcome.SUCCESS, duration_seconds: float = 60.0, tokens_used: int = 100, actions: list | None = None, occurred_at: datetime | None = None, context_summary: str = "Test context", ) -> Episode: """Create a mock episode for testing.""" return Episode( id=uuid4(), project_id=uuid4(), agent_instance_id=None, agent_type_id=None, session_id="session-123", task_type=task_type, task_description=f"Test {task_type}", actions=actions or [{"type": "action1", "content": "test"}], context_summary=context_summary, outcome=outcome, outcome_details="", duration_seconds=duration_seconds, tokens_used=tokens_used, lessons_learned=[], importance_score=0.5, embedding=None, occurred_at=occurred_at or datetime.now(UTC), created_at=datetime.now(UTC), updated_at=datetime.now(UTC), ) @pytest.fixture(autouse=True) async def reset_singleton() -> None: """Reset singleton before each test.""" await reset_memory_reflection() @pytest.fixture def mock_session() -> MagicMock: """Create mock database session.""" return MagicMock() @pytest.fixture def config() -> ReflectionConfig: """Create test configuration.""" return ReflectionConfig( min_pattern_occurrences=2, min_pattern_confidence=0.5, min_sample_size_for_factor=3, min_correlation_for_factor=0.2, min_baseline_samples=5, anomaly_std_dev_threshold=2.0, min_insight_confidence=0.1, # Lower for testing ) @pytest.fixture def reflection(mock_session: MagicMock, config: ReflectionConfig) -> MemoryReflection: """Create reflection service.""" return MemoryReflection(session=mock_session, config=config) class TestReflectionConfig: """Tests for ReflectionConfig.""" def test_default_values(self) -> None: """Should have sensible defaults.""" config = ReflectionConfig() assert config.min_pattern_occurrences == 3 assert config.min_pattern_confidence == 0.6 assert config.min_sample_size_for_factor == 5 assert config.anomaly_std_dev_threshold == 2.0 assert config.max_episodes_to_analyze == 1000 def test_custom_values(self) -> None: """Should allow custom values.""" config = ReflectionConfig( min_pattern_occurrences=5, min_pattern_confidence=0.8, ) assert config.min_pattern_occurrences == 5 assert config.min_pattern_confidence == 0.8 class TestPatternDetection: """Tests for pattern detection.""" async def test_detect_recurring_success_pattern( self, reflection: MemoryReflection, ) -> None: """Should detect recurring success patterns.""" project_id = uuid4() time_range = TimeRange.last_days(7) # Create episodes with high success rate for a task type # Ensure timestamps are within time range now = datetime.now(UTC) episodes = [ create_mock_episode( task_type="build", outcome=Outcome.SUCCESS, occurred_at=now - timedelta(hours=i), ) for i in range(8) ] + [ create_mock_episode( task_type="build", outcome=Outcome.FAILURE, occurred_at=now - timedelta(hours=8 + i), ) for i in range(2) ] # Mock episodic memory mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic patterns = await reflection.analyze_patterns(project_id, time_range) # Should find recurring success pattern for 'build' task success_patterns = [ p for p in patterns if p.pattern_type == PatternType.RECURRING_SUCCESS ] assert len(success_patterns) >= 1 assert any(p.name.find("build") >= 0 for p in success_patterns) async def test_detect_recurring_failure_pattern( self, reflection: MemoryReflection, ) -> None: """Should detect recurring failure patterns.""" project_id = uuid4() time_range = TimeRange.last_days(7) # Create episodes with high failure rate # Ensure timestamps are within time range now = datetime.now(UTC) episodes = [ create_mock_episode( task_type="deploy", outcome=Outcome.FAILURE, occurred_at=now - timedelta(hours=i), ) for i in range(7) ] + [ create_mock_episode( task_type="deploy", outcome=Outcome.SUCCESS, occurred_at=now - timedelta(hours=7 + i), ) for i in range(3) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic patterns = await reflection.analyze_patterns(project_id, time_range) failure_patterns = [ p for p in patterns if p.pattern_type == PatternType.RECURRING_FAILURE ] assert len(failure_patterns) >= 1 async def test_detect_action_sequence_pattern( self, reflection: MemoryReflection, ) -> None: """Should detect action sequence patterns.""" project_id = uuid4() time_range = TimeRange.last_days(7) # Create episodes with same action sequence # Ensure timestamps are within time range now = datetime.now(UTC) actions = [ {"type": "read_file"}, {"type": "analyze"}, {"type": "write_file"}, ] episodes = [ create_mock_episode( actions=actions, occurred_at=now - timedelta(hours=i), ) for i in range(5) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic patterns = await reflection.analyze_patterns(project_id, time_range) action_patterns = [ p for p in patterns if p.pattern_type == PatternType.ACTION_SEQUENCE ] assert len(action_patterns) >= 1 async def test_detect_temporal_pattern( self, reflection: MemoryReflection, ) -> None: """Should detect temporal patterns.""" project_id = uuid4() time_range = TimeRange.last_days(7) # Create episodes concentrated at a specific hour base_time = datetime.now(UTC).replace(hour=10, minute=0) episodes = [ create_mock_episode(occurred_at=base_time + timedelta(minutes=i * 5)) for i in range(10) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic patterns = await reflection.analyze_patterns(project_id, time_range) # May or may not find temporal patterns depending on thresholds # Just verify the analysis completes without error assert isinstance(patterns, list) async def test_empty_episodes_returns_empty( self, reflection: MemoryReflection, ) -> None: """Should return empty list when no episodes.""" project_id = uuid4() time_range = TimeRange.last_days(7) mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=[]) reflection._episodic = mock_episodic patterns = await reflection.analyze_patterns(project_id, time_range) assert patterns == [] class TestSuccessFactors: """Tests for success factor identification.""" async def test_identify_action_factors( self, reflection: MemoryReflection, ) -> None: """Should identify action-related success factors.""" project_id = uuid4() # Create episodes where 'validate' action correlates with success successful = [ create_mock_episode( outcome=Outcome.SUCCESS, actions=[{"type": "validate"}, {"type": "commit"}], ) for _ in range(5) ] failed = [ create_mock_episode( outcome=Outcome.FAILURE, actions=[{"type": "commit"}], # Missing validate ) for _ in range(5) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=successful + failed) reflection._episodic = mock_episodic factors = await reflection.identify_success_factors(project_id) action_factors = [f for f in factors if f.factor_type == FactorType.ACTION] assert len(action_factors) >= 0 # May or may not find based on thresholds async def test_identify_timing_factors( self, reflection: MemoryReflection, ) -> None: """Should identify timing-related factors.""" project_id = uuid4() # Successful tasks are faster successful = [ create_mock_episode(outcome=Outcome.SUCCESS, duration_seconds=30.0) for _ in range(5) ] # Failed tasks take longer failed = [ create_mock_episode(outcome=Outcome.FAILURE, duration_seconds=120.0) for _ in range(5) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=successful + failed) reflection._episodic = mock_episodic factors = await reflection.identify_success_factors(project_id) timing_factors = [f for f in factors if f.factor_type == FactorType.TIMING] assert len(timing_factors) >= 1 async def test_identify_resource_factors( self, reflection: MemoryReflection, ) -> None: """Should identify resource usage factors.""" project_id = uuid4() # Successful tasks use fewer tokens successful = [ create_mock_episode(outcome=Outcome.SUCCESS, tokens_used=100) for _ in range(5) ] # Failed tasks use more tokens failed = [ create_mock_episode(outcome=Outcome.FAILURE, tokens_used=500) for _ in range(5) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=successful + failed) reflection._episodic = mock_episodic factors = await reflection.identify_success_factors(project_id) resource_factors = [f for f in factors if f.factor_type == FactorType.RESOURCE] assert len(resource_factors) >= 1 async def test_filter_by_task_type( self, reflection: MemoryReflection, ) -> None: """Should filter by task type when specified.""" project_id = uuid4() episodes = [ create_mock_episode(task_type="target_task", outcome=Outcome.SUCCESS) for _ in range(5) ] mock_episodic = MagicMock() mock_episodic.get_by_task_type = AsyncMock(return_value=episodes) mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic await reflection.identify_success_factors(project_id, task_type="target_task") mock_episodic.get_by_task_type.assert_called_once() async def test_insufficient_samples( self, reflection: MemoryReflection, ) -> None: """Should return empty when insufficient samples.""" project_id = uuid4() # Only 2 episodes, config requires 3 minimum episodes = [create_mock_episode() for _ in range(2)] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic factors = await reflection.identify_success_factors(project_id) assert factors == [] class TestAnomalyDetection: """Tests for anomaly detection.""" async def test_detect_duration_anomaly( self, reflection: MemoryReflection, ) -> None: """Should detect unusual duration anomalies.""" project_id = uuid4() # Create baseline with consistent durations now = datetime.now(UTC) baseline = [ create_mock_episode( duration_seconds=60.0, occurred_at=now - timedelta(days=i), ) for i in range(2, 10) ] # Add recent anomaly with very long duration anomalous = create_mock_episode( duration_seconds=300.0, # 5x longer occurred_at=now - timedelta(hours=1), ) mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous]) reflection._episodic = mock_episodic anomalies = await reflection.detect_anomalies(project_id, baseline_days=30) duration_anomalies = [ a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_DURATION ] assert len(duration_anomalies) >= 1 async def test_detect_unexpected_outcome_anomaly( self, reflection: MemoryReflection, ) -> None: """Should detect unexpected outcome anomalies.""" project_id = uuid4() now = datetime.now(UTC) # Create baseline with high success rate baseline = [ create_mock_episode( task_type="reliable_task", outcome=Outcome.SUCCESS, occurred_at=now - timedelta(days=i), ) for i in range(2, 10) ] # Add recent failure for usually successful task anomalous = create_mock_episode( task_type="reliable_task", outcome=Outcome.FAILURE, occurred_at=now - timedelta(hours=1), ) mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous]) reflection._episodic = mock_episodic anomalies = await reflection.detect_anomalies(project_id, baseline_days=30) outcome_anomalies = [ a for a in anomalies if a.anomaly_type == AnomalyType.UNEXPECTED_OUTCOME ] assert len(outcome_anomalies) >= 1 async def test_detect_token_usage_anomaly( self, reflection: MemoryReflection, ) -> None: """Should detect unusual token usage.""" project_id = uuid4() now = datetime.now(UTC) # Create baseline with consistent token usage baseline = [ create_mock_episode( tokens_used=100, occurred_at=now - timedelta(days=i), ) for i in range(2, 10) ] # Add recent anomaly with very high token usage anomalous = create_mock_episode( tokens_used=1000, # 10x higher occurred_at=now - timedelta(hours=1), ) mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous]) reflection._episodic = mock_episodic anomalies = await reflection.detect_anomalies(project_id, baseline_days=30) token_anomalies = [ a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_TOKEN_USAGE ] assert len(token_anomalies) >= 1 async def test_detect_failure_rate_spike( self, reflection: MemoryReflection, ) -> None: """Should detect failure rate spikes.""" project_id = uuid4() now = datetime.now(UTC) # Create baseline with low failure rate baseline = [ create_mock_episode( outcome=Outcome.SUCCESS if i % 10 != 0 else Outcome.FAILURE, occurred_at=now - timedelta(days=i % 30), ) for i in range(30) ] # Add recent failures (spike) recent_failures = [ create_mock_episode( outcome=Outcome.FAILURE, occurred_at=now - timedelta(hours=i), ) for i in range(1, 6) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=baseline + recent_failures) reflection._episodic = mock_episodic anomalies = await reflection.detect_anomalies(project_id, baseline_days=30) # May or may not detect based on thresholds # Just verify the analysis completes without error assert isinstance(anomalies, list) async def test_insufficient_baseline( self, reflection: MemoryReflection, ) -> None: """Should return empty when insufficient baseline.""" project_id = uuid4() # Only 3 episodes, config requires 5 minimum episodes = [create_mock_episode() for _ in range(3)] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic anomalies = await reflection.detect_anomalies(project_id, baseline_days=30) assert anomalies == [] class TestInsightGeneration: """Tests for insight generation.""" async def test_generate_warning_insight_from_failure_pattern( self, reflection: MemoryReflection, ) -> None: """Should generate warning insight from failure patterns.""" project_id = uuid4() # Create episodes with recurring failure episodes = [ create_mock_episode(task_type="failing_task", outcome=Outcome.FAILURE) for _ in range(8) ] + [ create_mock_episode(task_type="failing_task", outcome=Outcome.SUCCESS) for _ in range(2) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic insights = await reflection.generate_insights(project_id) warning_insights = [ i for i in insights if i.insight_type == InsightType.WARNING ] assert len(warning_insights) >= 1 async def test_generate_learning_insight_from_success_pattern( self, reflection: MemoryReflection, ) -> None: """Should generate learning insight from success patterns.""" project_id = uuid4() # Create episodes with recurring success episodes = [ create_mock_episode(task_type="good_task", outcome=Outcome.SUCCESS) for _ in range(9) ] + [ create_mock_episode(task_type="good_task", outcome=Outcome.FAILURE) for _ in range(1) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic insights = await reflection.generate_insights(project_id) learning_insights = [ i for i in insights if i.insight_type == InsightType.LEARNING ] assert len(learning_insights) >= 0 # May depend on thresholds async def test_generate_trend_insight( self, reflection: MemoryReflection, ) -> None: """Should generate overall trend insight.""" project_id = uuid4() # Create enough episodes with timestamps in range now = datetime.now(UTC) episodes = [ create_mock_episode( outcome=Outcome.SUCCESS, occurred_at=now - timedelta(hours=i), ) for i in range(10) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic insights = await reflection.generate_insights(project_id) trend_insights = [i for i in insights if i.insight_type == InsightType.TREND] assert len(trend_insights) >= 1 async def test_insights_sorted_by_priority( self, reflection: MemoryReflection, ) -> None: """Should sort insights by priority.""" project_id = uuid4() episodes = [create_mock_episode(outcome=Outcome.SUCCESS) for _ in range(10)] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic insights = await reflection.generate_insights(project_id) if len(insights) >= 2: for i in range(len(insights) - 1): assert insights[i].priority >= insights[i + 1].priority class TestComprehensiveReflection: """Tests for comprehensive reflect() method.""" async def test_reflect_returns_all_components( self, reflection: MemoryReflection, ) -> None: """Should return patterns, factors, anomalies, and insights.""" project_id = uuid4() time_range = TimeRange.last_days(7) now = datetime.now(UTC) episodes = [ create_mock_episode( task_type="test_task", outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE, occurred_at=now - timedelta(hours=i), ) for i in range(20) ] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic result = await reflection.reflect(project_id, time_range) assert result.patterns is not None assert result.factors is not None assert result.anomalies is not None assert result.insights is not None assert result.episodes_analyzed >= 0 assert result.analysis_duration_seconds >= 0 async def test_reflect_with_default_time_range( self, reflection: MemoryReflection, ) -> None: """Should use default 7-day time range.""" project_id = uuid4() episodes = [create_mock_episode() for _ in range(5)] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic result = await reflection.reflect(project_id) assert 6.9 <= result.time_range.duration_days <= 7.1 async def test_reflect_summary( self, reflection: MemoryReflection, ) -> None: """Should generate meaningful summary.""" project_id = uuid4() episodes = [create_mock_episode() for _ in range(10)] mock_episodic = MagicMock() mock_episodic.get_recent = AsyncMock(return_value=episodes) reflection._episodic = mock_episodic result = await reflection.reflect(project_id) summary = result.summary assert "Reflection Analysis" in summary assert "Episodes analyzed" in summary class TestFactoryFunction: """Tests for factory function behavior. Note: The singleton pattern was removed to avoid stale database session bugs. Each call now creates a fresh instance, which is safer for request-scoped usage. """ async def test_get_memory_reflection_creates_new_instance( self, mock_session: MagicMock, ) -> None: """Should create new instance each call (no singleton for session safety).""" r1 = await get_memory_reflection(mock_session) r2 = await get_memory_reflection(mock_session) # Different instances to avoid stale session issues assert r1 is not r2 async def test_reset_is_no_op( self, mock_session: MagicMock, ) -> None: """Reset should be a no-op (kept for API compatibility).""" r1 = await get_memory_reflection(mock_session) await reset_memory_reflection() # Should not raise r2 = await get_memory_reflection(mock_session) # Still creates new instances (reset is no-op now) assert r1 is not r2