# tests/unit/services/memory/consolidation/test_service.py """Unit tests for memory consolidation service.""" from datetime import UTC, datetime, timedelta from unittest.mock import AsyncMock, MagicMock, patch from uuid import uuid4 import pytest from app.services.memory.consolidation.service import ( ConsolidationConfig, ConsolidationResult, MemoryConsolidationService, NightlyConsolidationResult, SessionConsolidationResult, ) from app.services.memory.types import Episode, Outcome, TaskState def _utcnow() -> datetime: """Get current UTC time.""" return datetime.now(UTC) def make_episode( outcome: Outcome = Outcome.SUCCESS, occurred_at: datetime | None = None, task_type: str = "test_task", lessons_learned: list[str] | None = None, importance_score: float = 0.5, actions: list[dict] | None = None, ) -> Episode: """Create a test episode.""" return Episode( id=uuid4(), project_id=uuid4(), agent_instance_id=uuid4(), agent_type_id=uuid4(), session_id="test-session", task_type=task_type, task_description="Test task description", actions=actions or [{"action": "test"}], context_summary="Test context", outcome=outcome, outcome_details="Test outcome", duration_seconds=10.0, tokens_used=100, lessons_learned=lessons_learned or [], importance_score=importance_score, embedding=None, occurred_at=occurred_at or _utcnow(), created_at=_utcnow(), updated_at=_utcnow(), ) def make_task_state( current_step: int = 5, total_steps: int = 10, progress_percent: float = 50.0, status: str = "in_progress", description: str = "Test Task", ) -> TaskState: """Create a test task state.""" now = _utcnow() return TaskState( task_id="test-task-id", task_type="test_task", description=description, current_step=current_step, total_steps=total_steps, status=status, progress_percent=progress_percent, started_at=now - timedelta(hours=1), updated_at=now, ) class TestConsolidationConfig: """Tests for ConsolidationConfig.""" def test_default_values(self) -> None: """Test default configuration values.""" config = ConsolidationConfig() assert config.min_steps_for_episode == 2 assert config.min_duration_seconds == 5.0 assert config.min_confidence_for_fact == 0.6 assert config.max_facts_per_episode == 10 assert config.min_episodes_for_procedure == 3 assert config.max_episode_age_days == 90 assert config.batch_size == 100 def test_custom_values(self) -> None: """Test custom configuration values.""" config = ConsolidationConfig( min_steps_for_episode=5, batch_size=50, ) assert config.min_steps_for_episode == 5 assert config.batch_size == 50 class TestConsolidationResult: """Tests for ConsolidationResult.""" def test_creation(self) -> None: """Test creating a consolidation result.""" result = ConsolidationResult( source_type="episodic", target_type="semantic", items_processed=10, items_created=5, ) assert result.source_type == "episodic" assert result.target_type == "semantic" assert result.items_processed == 10 assert result.items_created == 5 assert result.items_skipped == 0 assert result.errors == [] def test_to_dict(self) -> None: """Test converting to dictionary.""" result = ConsolidationResult( source_type="episodic", target_type="semantic", items_processed=10, items_created=5, errors=["test error"], ) d = result.to_dict() assert d["source_type"] == "episodic" assert d["target_type"] == "semantic" assert d["items_processed"] == 10 assert d["items_created"] == 5 assert "test error" in d["errors"] class TestSessionConsolidationResult: """Tests for SessionConsolidationResult.""" def test_creation(self) -> None: """Test creating a session consolidation result.""" result = SessionConsolidationResult( session_id="test-session", episode_created=True, episode_id=uuid4(), scratchpad_entries=5, ) assert result.session_id == "test-session" assert result.episode_created is True assert result.episode_id is not None class TestNightlyConsolidationResult: """Tests for NightlyConsolidationResult.""" def test_creation(self) -> None: """Test creating a nightly consolidation result.""" result = NightlyConsolidationResult( started_at=_utcnow(), ) assert result.started_at is not None assert result.completed_at is None assert result.total_episodes_processed == 0 def test_to_dict(self) -> None: """Test converting to dictionary.""" result = NightlyConsolidationResult( started_at=_utcnow(), completed_at=_utcnow(), total_facts_created=5, total_procedures_created=2, ) d = result.to_dict() assert "started_at" in d assert "completed_at" in d assert d["total_facts_created"] == 5 assert d["total_procedures_created"] == 2 class TestMemoryConsolidationService: """Tests for MemoryConsolidationService.""" @pytest.fixture def mock_session(self) -> AsyncMock: """Create a mock database session.""" return AsyncMock() @pytest.fixture def service(self, mock_session: AsyncMock) -> MemoryConsolidationService: """Create a consolidation service with mocked dependencies.""" return MemoryConsolidationService( session=mock_session, config=ConsolidationConfig(), ) # ========================================================================= # Session Consolidation Tests # ========================================================================= @pytest.mark.asyncio async def test_consolidate_session_insufficient_steps( self, service: MemoryConsolidationService ) -> None: """Test session not consolidated when insufficient steps.""" mock_working_memory = AsyncMock() task_state = make_task_state(current_step=1) # Less than min_steps_for_episode mock_working_memory.get_task_state.return_value = task_state result = await service.consolidate_session( working_memory=mock_working_memory, project_id=uuid4(), session_id="test-session", ) assert result.episode_created is False assert result.episode_id is None @pytest.mark.asyncio async def test_consolidate_session_no_task_state( self, service: MemoryConsolidationService ) -> None: """Test session not consolidated when no task state.""" mock_working_memory = AsyncMock() mock_working_memory.get_task_state.return_value = None result = await service.consolidate_session( working_memory=mock_working_memory, project_id=uuid4(), session_id="test-session", ) assert result.episode_created is False @pytest.mark.asyncio async def test_consolidate_session_success( self, service: MemoryConsolidationService, mock_session: AsyncMock ) -> None: """Test successful session consolidation.""" mock_working_memory = AsyncMock() task_state = make_task_state( current_step=5, progress_percent=100.0, status="complete", ) mock_working_memory.get_task_state.return_value = task_state mock_working_memory.get_scratchpad.return_value = ["step1", "step2"] mock_working_memory.get_all.return_value = {"key1": "value1"} # Mock episodic memory mock_episode = make_episode() with patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic: mock_episodic = AsyncMock() mock_episodic.record_episode.return_value = mock_episode mock_get_episodic.return_value = mock_episodic result = await service.consolidate_session( working_memory=mock_working_memory, project_id=uuid4(), session_id="test-session", ) assert result.episode_created is True assert result.episode_id == mock_episode.id assert result.scratchpad_entries == 2 # ========================================================================= # Outcome Determination Tests # ========================================================================= def test_determine_session_outcome_success( self, service: MemoryConsolidationService ) -> None: """Test outcome determination for successful session.""" task_state = make_task_state(status="complete", progress_percent=100.0) outcome = service._determine_session_outcome(task_state) assert outcome == Outcome.SUCCESS def test_determine_session_outcome_failure( self, service: MemoryConsolidationService ) -> None: """Test outcome determination for failed session.""" task_state = make_task_state(status="error", progress_percent=25.0) outcome = service._determine_session_outcome(task_state) assert outcome == Outcome.FAILURE def test_determine_session_outcome_partial( self, service: MemoryConsolidationService ) -> None: """Test outcome determination for partial session.""" task_state = make_task_state(status="stopped", progress_percent=60.0) outcome = service._determine_session_outcome(task_state) assert outcome == Outcome.PARTIAL def test_determine_session_outcome_none( self, service: MemoryConsolidationService ) -> None: """Test outcome determination with no task state.""" outcome = service._determine_session_outcome(None) assert outcome == Outcome.PARTIAL # ========================================================================= # Action Building Tests # ========================================================================= def test_build_actions_from_session( self, service: MemoryConsolidationService ) -> None: """Test building actions from session data.""" scratchpad = ["thought 1", "thought 2"] variables = {"var1": "value1"} task_state = make_task_state() actions = service._build_actions_from_session(scratchpad, variables, task_state) assert len(actions) == 3 # 2 scratchpad + 1 final state assert actions[0]["type"] == "reasoning" assert actions[2]["type"] == "final_state" def test_build_context_summary(self, service: MemoryConsolidationService) -> None: """Test building context summary.""" task_state = make_task_state( description="Test Task", progress_percent=75.0, ) variables = {"key": "value"} summary = service._build_context_summary(task_state, variables) assert "Test Task" in summary assert "75.0%" in summary # ========================================================================= # Importance Calculation Tests # ========================================================================= def test_calculate_session_importance_base( self, service: MemoryConsolidationService ) -> None: """Test base importance calculation.""" task_state = make_task_state(total_steps=3) # Below threshold importance = service._calculate_session_importance( task_state, Outcome.SUCCESS, [] ) assert importance == 0.5 # Base score def test_calculate_session_importance_failure( self, service: MemoryConsolidationService ) -> None: """Test importance boost for failures.""" task_state = make_task_state(total_steps=3) # Below threshold importance = service._calculate_session_importance( task_state, Outcome.FAILURE, [] ) assert importance == 0.8 # Base (0.5) + failure boost (0.3) def test_calculate_session_importance_complex( self, service: MemoryConsolidationService ) -> None: """Test importance for complex session.""" task_state = make_task_state(total_steps=10) actions = [{"step": i} for i in range(6)] importance = service._calculate_session_importance( task_state, Outcome.SUCCESS, actions ) # Base (0.5) + many steps (0.1) + many actions (0.1) assert importance == 0.7 # ========================================================================= # Episode to Fact Consolidation Tests # ========================================================================= @pytest.mark.asyncio async def test_consolidate_episodes_to_facts_empty( self, service: MemoryConsolidationService ) -> None: """Test consolidation with no episodes.""" with patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic: mock_episodic = AsyncMock() mock_episodic.get_recent.return_value = [] mock_get_episodic.return_value = mock_episodic result = await service.consolidate_episodes_to_facts( project_id=uuid4(), ) assert result.items_processed == 0 assert result.items_created == 0 @pytest.mark.asyncio async def test_consolidate_episodes_to_facts_success( self, service: MemoryConsolidationService ) -> None: """Test successful fact extraction.""" episode = make_episode( lessons_learned=["Always check return values"], ) mock_fact = MagicMock() mock_fact.reinforcement_count = 1 # New fact with ( patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic, patch.object( service, "_get_semantic", new_callable=AsyncMock ) as mock_get_semantic, ): mock_episodic = AsyncMock() mock_episodic.get_recent.return_value = [episode] mock_get_episodic.return_value = mock_episodic mock_semantic = AsyncMock() mock_semantic.store_fact.return_value = mock_fact mock_get_semantic.return_value = mock_semantic result = await service.consolidate_episodes_to_facts( project_id=uuid4(), ) assert result.items_processed == 1 # At least one fact should be created from lesson assert result.items_created >= 0 # ========================================================================= # Episode to Procedure Consolidation Tests # ========================================================================= @pytest.mark.asyncio async def test_consolidate_episodes_to_procedures_insufficient( self, service: MemoryConsolidationService ) -> None: """Test consolidation with insufficient episodes.""" # Only 1 episode - less than min_episodes_for_procedure (3) episode = make_episode() with patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic: mock_episodic = AsyncMock() mock_episodic.get_by_outcome.return_value = [episode] mock_get_episodic.return_value = mock_episodic result = await service.consolidate_episodes_to_procedures( project_id=uuid4(), ) assert result.items_processed == 1 assert result.items_created == 0 assert result.items_skipped == 1 @pytest.mark.asyncio async def test_consolidate_episodes_to_procedures_success( self, service: MemoryConsolidationService ) -> None: """Test successful procedure creation.""" # Create enough episodes for a procedure episodes = [ make_episode( task_type="deploy", actions=[{"type": "step1"}, {"type": "step2"}, {"type": "step3"}], ) for _ in range(5) ] mock_procedure = MagicMock() with ( patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic, patch.object( service, "_get_procedural", new_callable=AsyncMock ) as mock_get_procedural, ): mock_episodic = AsyncMock() mock_episodic.get_by_outcome.return_value = episodes mock_get_episodic.return_value = mock_episodic mock_procedural = AsyncMock() mock_procedural.find_matching.return_value = [] # No existing procedure mock_procedural.record_procedure.return_value = mock_procedure mock_get_procedural.return_value = mock_procedural result = await service.consolidate_episodes_to_procedures( project_id=uuid4(), ) assert result.items_processed == 5 assert result.items_created == 1 # ========================================================================= # Common Steps Extraction Tests # ========================================================================= def test_extract_common_steps(self, service: MemoryConsolidationService) -> None: """Test extracting steps from episodes.""" episodes = [ make_episode( outcome=Outcome.SUCCESS, importance_score=0.8, actions=[ {"type": "step1", "content": "First step"}, {"type": "step2", "content": "Second step"}, ], ), make_episode( outcome=Outcome.SUCCESS, importance_score=0.5, actions=[{"type": "simple"}], ), ] steps = service._extract_common_steps(episodes) assert len(steps) == 2 assert steps[0]["order"] == 1 assert steps[0]["action"] == "step1" # ========================================================================= # Pruning Tests # ========================================================================= def test_should_prune_episode_old_low_importance( self, service: MemoryConsolidationService ) -> None: """Test pruning old, low-importance episode.""" old_date = _utcnow() - timedelta(days=100) episode = make_episode( occurred_at=old_date, importance_score=0.1, outcome=Outcome.SUCCESS, ) cutoff = _utcnow() - timedelta(days=90) should_prune = service._should_prune_episode(episode, cutoff, 0.2) assert should_prune is True def test_should_prune_episode_recent( self, service: MemoryConsolidationService ) -> None: """Test not pruning recent episode.""" recent_date = _utcnow() - timedelta(days=30) episode = make_episode( occurred_at=recent_date, importance_score=0.1, ) cutoff = _utcnow() - timedelta(days=90) should_prune = service._should_prune_episode(episode, cutoff, 0.2) assert should_prune is False def test_should_prune_episode_failure_protected( self, service: MemoryConsolidationService ) -> None: """Test not pruning failure (with keep_all_failures=True).""" old_date = _utcnow() - timedelta(days=100) episode = make_episode( occurred_at=old_date, importance_score=0.1, outcome=Outcome.FAILURE, ) cutoff = _utcnow() - timedelta(days=90) should_prune = service._should_prune_episode(episode, cutoff, 0.2) # Config has keep_all_failures=True by default assert should_prune is False def test_should_prune_episode_with_lessons_protected( self, service: MemoryConsolidationService ) -> None: """Test not pruning episode with lessons.""" old_date = _utcnow() - timedelta(days=100) episode = make_episode( occurred_at=old_date, importance_score=0.1, lessons_learned=["Important lesson"], ) cutoff = _utcnow() - timedelta(days=90) should_prune = service._should_prune_episode(episode, cutoff, 0.2) # Config has keep_all_with_lessons=True by default assert should_prune is False def test_should_prune_episode_high_importance_protected( self, service: MemoryConsolidationService ) -> None: """Test not pruning high importance episode.""" old_date = _utcnow() - timedelta(days=100) episode = make_episode( occurred_at=old_date, importance_score=0.8, ) cutoff = _utcnow() - timedelta(days=90) should_prune = service._should_prune_episode(episode, cutoff, 0.2) assert should_prune is False @pytest.mark.asyncio async def test_prune_old_episodes( self, service: MemoryConsolidationService ) -> None: """Test episode pruning.""" old_episode = make_episode( occurred_at=_utcnow() - timedelta(days=100), importance_score=0.1, outcome=Outcome.SUCCESS, lessons_learned=[], ) with patch.object( service, "_get_episodic", new_callable=AsyncMock ) as mock_get_episodic: mock_episodic = AsyncMock() mock_episodic.get_recent.return_value = [old_episode] mock_episodic.delete.return_value = True mock_get_episodic.return_value = mock_episodic result = await service.prune_old_episodes(project_id=uuid4()) assert result.items_processed == 1 assert result.items_pruned == 1 # ========================================================================= # Nightly Consolidation Tests # ========================================================================= @pytest.mark.asyncio async def test_run_nightly_consolidation( self, service: MemoryConsolidationService ) -> None: """Test nightly consolidation workflow.""" with ( patch.object( service, "consolidate_episodes_to_facts", new_callable=AsyncMock, ) as mock_facts, patch.object( service, "consolidate_episodes_to_procedures", new_callable=AsyncMock, ) as mock_procedures, patch.object( service, "prune_old_episodes", new_callable=AsyncMock, ) as mock_prune, ): mock_facts.return_value = ConsolidationResult( source_type="episodic", target_type="semantic", items_processed=10, items_created=5, ) mock_procedures.return_value = ConsolidationResult( source_type="episodic", target_type="procedural", items_processed=10, items_created=2, ) mock_prune.return_value = ConsolidationResult( source_type="episodic", target_type="pruned", items_pruned=3, ) result = await service.run_nightly_consolidation(project_id=uuid4()) assert result.completed_at is not None assert result.total_facts_created == 5 assert result.total_procedures_created == 2 assert result.total_pruned == 3 assert result.total_episodes_processed == 20 @pytest.mark.asyncio async def test_run_nightly_consolidation_with_errors( self, service: MemoryConsolidationService ) -> None: """Test nightly consolidation handles errors.""" with ( patch.object( service, "consolidate_episodes_to_facts", new_callable=AsyncMock, ) as mock_facts, patch.object( service, "consolidate_episodes_to_procedures", new_callable=AsyncMock, ) as mock_procedures, patch.object( service, "prune_old_episodes", new_callable=AsyncMock, ) as mock_prune, ): mock_facts.return_value = ConsolidationResult( source_type="episodic", target_type="semantic", errors=["fact error"], ) mock_procedures.return_value = ConsolidationResult( source_type="episodic", target_type="procedural", ) mock_prune.return_value = ConsolidationResult( source_type="episodic", target_type="pruned", ) result = await service.run_nightly_consolidation(project_id=uuid4()) assert "fact error" in result.errors