Files
syndarix/backend/tests/unit/services/memory/consolidation/test_service.py
Felipe Cardoso 1670e05e0d feat(memory): implement memory consolidation service and tasks (#95)
- Add MemoryConsolidationService with Working→Episodic→Semantic/Procedural transfer
- Add Celery tasks for session and nightly consolidation
- Implement memory pruning with importance-based retention
- Add comprehensive test suite (32 tests)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 03:04:28 +01:00

737 lines
25 KiB
Python

# tests/unit/services/memory/consolidation/test_service.py
"""Unit tests for memory consolidation service."""
from datetime import UTC, datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
from uuid import uuid4
import pytest
from app.services.memory.consolidation.service import (
ConsolidationConfig,
ConsolidationResult,
MemoryConsolidationService,
NightlyConsolidationResult,
SessionConsolidationResult,
)
from app.services.memory.types import Episode, Outcome, TaskState
def _utcnow() -> datetime:
"""Get current UTC time."""
return datetime.now(UTC)
def make_episode(
outcome: Outcome = Outcome.SUCCESS,
occurred_at: datetime | None = None,
task_type: str = "test_task",
lessons_learned: list[str] | None = None,
importance_score: float = 0.5,
actions: list[dict] | None = None,
) -> Episode:
"""Create a test episode."""
return Episode(
id=uuid4(),
project_id=uuid4(),
agent_instance_id=uuid4(),
agent_type_id=uuid4(),
session_id="test-session",
task_type=task_type,
task_description="Test task description",
actions=actions or [{"action": "test"}],
context_summary="Test context",
outcome=outcome,
outcome_details="Test outcome",
duration_seconds=10.0,
tokens_used=100,
lessons_learned=lessons_learned or [],
importance_score=importance_score,
embedding=None,
occurred_at=occurred_at or _utcnow(),
created_at=_utcnow(),
updated_at=_utcnow(),
)
def make_task_state(
current_step: int = 5,
total_steps: int = 10,
progress_percent: float = 50.0,
status: str = "in_progress",
description: str = "Test Task",
) -> TaskState:
"""Create a test task state."""
now = _utcnow()
return TaskState(
task_id="test-task-id",
task_type="test_task",
description=description,
current_step=current_step,
total_steps=total_steps,
status=status,
progress_percent=progress_percent,
started_at=now - timedelta(hours=1),
updated_at=now,
)
class TestConsolidationConfig:
"""Tests for ConsolidationConfig."""
def test_default_values(self) -> None:
"""Test default configuration values."""
config = ConsolidationConfig()
assert config.min_steps_for_episode == 2
assert config.min_duration_seconds == 5.0
assert config.min_confidence_for_fact == 0.6
assert config.max_facts_per_episode == 10
assert config.min_episodes_for_procedure == 3
assert config.max_episode_age_days == 90
assert config.batch_size == 100
def test_custom_values(self) -> None:
"""Test custom configuration values."""
config = ConsolidationConfig(
min_steps_for_episode=5,
batch_size=50,
)
assert config.min_steps_for_episode == 5
assert config.batch_size == 50
class TestConsolidationResult:
"""Tests for ConsolidationResult."""
def test_creation(self) -> None:
"""Test creating a consolidation result."""
result = ConsolidationResult(
source_type="episodic",
target_type="semantic",
items_processed=10,
items_created=5,
)
assert result.source_type == "episodic"
assert result.target_type == "semantic"
assert result.items_processed == 10
assert result.items_created == 5
assert result.items_skipped == 0
assert result.errors == []
def test_to_dict(self) -> None:
"""Test converting to dictionary."""
result = ConsolidationResult(
source_type="episodic",
target_type="semantic",
items_processed=10,
items_created=5,
errors=["test error"],
)
d = result.to_dict()
assert d["source_type"] == "episodic"
assert d["target_type"] == "semantic"
assert d["items_processed"] == 10
assert d["items_created"] == 5
assert "test error" in d["errors"]
class TestSessionConsolidationResult:
"""Tests for SessionConsolidationResult."""
def test_creation(self) -> None:
"""Test creating a session consolidation result."""
result = SessionConsolidationResult(
session_id="test-session",
episode_created=True,
episode_id=uuid4(),
scratchpad_entries=5,
)
assert result.session_id == "test-session"
assert result.episode_created is True
assert result.episode_id is not None
class TestNightlyConsolidationResult:
"""Tests for NightlyConsolidationResult."""
def test_creation(self) -> None:
"""Test creating a nightly consolidation result."""
result = NightlyConsolidationResult(
started_at=_utcnow(),
)
assert result.started_at is not None
assert result.completed_at is None
assert result.total_episodes_processed == 0
def test_to_dict(self) -> None:
"""Test converting to dictionary."""
result = NightlyConsolidationResult(
started_at=_utcnow(),
completed_at=_utcnow(),
total_facts_created=5,
total_procedures_created=2,
)
d = result.to_dict()
assert "started_at" in d
assert "completed_at" in d
assert d["total_facts_created"] == 5
assert d["total_procedures_created"] == 2
class TestMemoryConsolidationService:
"""Tests for MemoryConsolidationService."""
@pytest.fixture
def mock_session(self) -> AsyncMock:
"""Create a mock database session."""
return AsyncMock()
@pytest.fixture
def service(self, mock_session: AsyncMock) -> MemoryConsolidationService:
"""Create a consolidation service with mocked dependencies."""
return MemoryConsolidationService(
session=mock_session,
config=ConsolidationConfig(),
)
# =========================================================================
# Session Consolidation Tests
# =========================================================================
@pytest.mark.asyncio
async def test_consolidate_session_insufficient_steps(
self, service: MemoryConsolidationService
) -> None:
"""Test session not consolidated when insufficient steps."""
mock_working_memory = AsyncMock()
task_state = make_task_state(current_step=1) # Less than min_steps_for_episode
mock_working_memory.get_task_state.return_value = task_state
result = await service.consolidate_session(
working_memory=mock_working_memory,
project_id=uuid4(),
session_id="test-session",
)
assert result.episode_created is False
assert result.episode_id is None
@pytest.mark.asyncio
async def test_consolidate_session_no_task_state(
self, service: MemoryConsolidationService
) -> None:
"""Test session not consolidated when no task state."""
mock_working_memory = AsyncMock()
mock_working_memory.get_task_state.return_value = None
result = await service.consolidate_session(
working_memory=mock_working_memory,
project_id=uuid4(),
session_id="test-session",
)
assert result.episode_created is False
@pytest.mark.asyncio
async def test_consolidate_session_success(
self, service: MemoryConsolidationService, mock_session: AsyncMock
) -> None:
"""Test successful session consolidation."""
mock_working_memory = AsyncMock()
task_state = make_task_state(
current_step=5,
progress_percent=100.0,
status="complete",
)
mock_working_memory.get_task_state.return_value = task_state
mock_working_memory.get_scratchpad.return_value = ["step1", "step2"]
mock_working_memory.get_all.return_value = {"key1": "value1"}
# Mock episodic memory
mock_episode = make_episode()
with patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic:
mock_episodic = AsyncMock()
mock_episodic.record_episode.return_value = mock_episode
mock_get_episodic.return_value = mock_episodic
result = await service.consolidate_session(
working_memory=mock_working_memory,
project_id=uuid4(),
session_id="test-session",
)
assert result.episode_created is True
assert result.episode_id == mock_episode.id
assert result.scratchpad_entries == 2
# =========================================================================
# Outcome Determination Tests
# =========================================================================
def test_determine_session_outcome_success(
self, service: MemoryConsolidationService
) -> None:
"""Test outcome determination for successful session."""
task_state = make_task_state(status="complete", progress_percent=100.0)
outcome = service._determine_session_outcome(task_state)
assert outcome == Outcome.SUCCESS
def test_determine_session_outcome_failure(
self, service: MemoryConsolidationService
) -> None:
"""Test outcome determination for failed session."""
task_state = make_task_state(status="error", progress_percent=25.0)
outcome = service._determine_session_outcome(task_state)
assert outcome == Outcome.FAILURE
def test_determine_session_outcome_partial(
self, service: MemoryConsolidationService
) -> None:
"""Test outcome determination for partial session."""
task_state = make_task_state(status="stopped", progress_percent=60.0)
outcome = service._determine_session_outcome(task_state)
assert outcome == Outcome.PARTIAL
def test_determine_session_outcome_none(
self, service: MemoryConsolidationService
) -> None:
"""Test outcome determination with no task state."""
outcome = service._determine_session_outcome(None)
assert outcome == Outcome.PARTIAL
# =========================================================================
# Action Building Tests
# =========================================================================
def test_build_actions_from_session(
self, service: MemoryConsolidationService
) -> None:
"""Test building actions from session data."""
scratchpad = ["thought 1", "thought 2"]
variables = {"var1": "value1"}
task_state = make_task_state()
actions = service._build_actions_from_session(scratchpad, variables, task_state)
assert len(actions) == 3 # 2 scratchpad + 1 final state
assert actions[0]["type"] == "reasoning"
assert actions[2]["type"] == "final_state"
def test_build_context_summary(self, service: MemoryConsolidationService) -> None:
"""Test building context summary."""
task_state = make_task_state(
description="Test Task",
progress_percent=75.0,
)
variables = {"key": "value"}
summary = service._build_context_summary(task_state, variables)
assert "Test Task" in summary
assert "75.0%" in summary
# =========================================================================
# Importance Calculation Tests
# =========================================================================
def test_calculate_session_importance_base(
self, service: MemoryConsolidationService
) -> None:
"""Test base importance calculation."""
task_state = make_task_state(total_steps=3) # Below threshold
importance = service._calculate_session_importance(
task_state, Outcome.SUCCESS, []
)
assert importance == 0.5 # Base score
def test_calculate_session_importance_failure(
self, service: MemoryConsolidationService
) -> None:
"""Test importance boost for failures."""
task_state = make_task_state(total_steps=3) # Below threshold
importance = service._calculate_session_importance(
task_state, Outcome.FAILURE, []
)
assert importance == 0.8 # Base (0.5) + failure boost (0.3)
def test_calculate_session_importance_complex(
self, service: MemoryConsolidationService
) -> None:
"""Test importance for complex session."""
task_state = make_task_state(total_steps=10)
actions = [{"step": i} for i in range(6)]
importance = service._calculate_session_importance(
task_state, Outcome.SUCCESS, actions
)
# Base (0.5) + many steps (0.1) + many actions (0.1)
assert importance == 0.7
# =========================================================================
# Episode to Fact Consolidation Tests
# =========================================================================
@pytest.mark.asyncio
async def test_consolidate_episodes_to_facts_empty(
self, service: MemoryConsolidationService
) -> None:
"""Test consolidation with no episodes."""
with patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic:
mock_episodic = AsyncMock()
mock_episodic.get_recent.return_value = []
mock_get_episodic.return_value = mock_episodic
result = await service.consolidate_episodes_to_facts(
project_id=uuid4(),
)
assert result.items_processed == 0
assert result.items_created == 0
@pytest.mark.asyncio
async def test_consolidate_episodes_to_facts_success(
self, service: MemoryConsolidationService
) -> None:
"""Test successful fact extraction."""
episode = make_episode(
lessons_learned=["Always check return values"],
)
mock_fact = MagicMock()
mock_fact.reinforcement_count = 1 # New fact
with (
patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic,
patch.object(
service, "_get_semantic", new_callable=AsyncMock
) as mock_get_semantic,
):
mock_episodic = AsyncMock()
mock_episodic.get_recent.return_value = [episode]
mock_get_episodic.return_value = mock_episodic
mock_semantic = AsyncMock()
mock_semantic.store_fact.return_value = mock_fact
mock_get_semantic.return_value = mock_semantic
result = await service.consolidate_episodes_to_facts(
project_id=uuid4(),
)
assert result.items_processed == 1
# At least one fact should be created from lesson
assert result.items_created >= 0
# =========================================================================
# Episode to Procedure Consolidation Tests
# =========================================================================
@pytest.mark.asyncio
async def test_consolidate_episodes_to_procedures_insufficient(
self, service: MemoryConsolidationService
) -> None:
"""Test consolidation with insufficient episodes."""
# Only 1 episode - less than min_episodes_for_procedure (3)
episode = make_episode()
with patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic:
mock_episodic = AsyncMock()
mock_episodic.get_by_outcome.return_value = [episode]
mock_get_episodic.return_value = mock_episodic
result = await service.consolidate_episodes_to_procedures(
project_id=uuid4(),
)
assert result.items_processed == 1
assert result.items_created == 0
assert result.items_skipped == 1
@pytest.mark.asyncio
async def test_consolidate_episodes_to_procedures_success(
self, service: MemoryConsolidationService
) -> None:
"""Test successful procedure creation."""
# Create enough episodes for a procedure
episodes = [
make_episode(
task_type="deploy",
actions=[{"type": "step1"}, {"type": "step2"}, {"type": "step3"}],
)
for _ in range(5)
]
mock_procedure = MagicMock()
with (
patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic,
patch.object(
service, "_get_procedural", new_callable=AsyncMock
) as mock_get_procedural,
):
mock_episodic = AsyncMock()
mock_episodic.get_by_outcome.return_value = episodes
mock_get_episodic.return_value = mock_episodic
mock_procedural = AsyncMock()
mock_procedural.find_matching.return_value = [] # No existing procedure
mock_procedural.record_procedure.return_value = mock_procedure
mock_get_procedural.return_value = mock_procedural
result = await service.consolidate_episodes_to_procedures(
project_id=uuid4(),
)
assert result.items_processed == 5
assert result.items_created == 1
# =========================================================================
# Common Steps Extraction Tests
# =========================================================================
def test_extract_common_steps(self, service: MemoryConsolidationService) -> None:
"""Test extracting steps from episodes."""
episodes = [
make_episode(
outcome=Outcome.SUCCESS,
importance_score=0.8,
actions=[
{"type": "step1", "content": "First step"},
{"type": "step2", "content": "Second step"},
],
),
make_episode(
outcome=Outcome.SUCCESS,
importance_score=0.5,
actions=[{"type": "simple"}],
),
]
steps = service._extract_common_steps(episodes)
assert len(steps) == 2
assert steps[0]["order"] == 1
assert steps[0]["action"] == "step1"
# =========================================================================
# Pruning Tests
# =========================================================================
def test_should_prune_episode_old_low_importance(
self, service: MemoryConsolidationService
) -> None:
"""Test pruning old, low-importance episode."""
old_date = _utcnow() - timedelta(days=100)
episode = make_episode(
occurred_at=old_date,
importance_score=0.1,
outcome=Outcome.SUCCESS,
)
cutoff = _utcnow() - timedelta(days=90)
should_prune = service._should_prune_episode(episode, cutoff, 0.2)
assert should_prune is True
def test_should_prune_episode_recent(
self, service: MemoryConsolidationService
) -> None:
"""Test not pruning recent episode."""
recent_date = _utcnow() - timedelta(days=30)
episode = make_episode(
occurred_at=recent_date,
importance_score=0.1,
)
cutoff = _utcnow() - timedelta(days=90)
should_prune = service._should_prune_episode(episode, cutoff, 0.2)
assert should_prune is False
def test_should_prune_episode_failure_protected(
self, service: MemoryConsolidationService
) -> None:
"""Test not pruning failure (with keep_all_failures=True)."""
old_date = _utcnow() - timedelta(days=100)
episode = make_episode(
occurred_at=old_date,
importance_score=0.1,
outcome=Outcome.FAILURE,
)
cutoff = _utcnow() - timedelta(days=90)
should_prune = service._should_prune_episode(episode, cutoff, 0.2)
# Config has keep_all_failures=True by default
assert should_prune is False
def test_should_prune_episode_with_lessons_protected(
self, service: MemoryConsolidationService
) -> None:
"""Test not pruning episode with lessons."""
old_date = _utcnow() - timedelta(days=100)
episode = make_episode(
occurred_at=old_date,
importance_score=0.1,
lessons_learned=["Important lesson"],
)
cutoff = _utcnow() - timedelta(days=90)
should_prune = service._should_prune_episode(episode, cutoff, 0.2)
# Config has keep_all_with_lessons=True by default
assert should_prune is False
def test_should_prune_episode_high_importance_protected(
self, service: MemoryConsolidationService
) -> None:
"""Test not pruning high importance episode."""
old_date = _utcnow() - timedelta(days=100)
episode = make_episode(
occurred_at=old_date,
importance_score=0.8,
)
cutoff = _utcnow() - timedelta(days=90)
should_prune = service._should_prune_episode(episode, cutoff, 0.2)
assert should_prune is False
@pytest.mark.asyncio
async def test_prune_old_episodes(
self, service: MemoryConsolidationService
) -> None:
"""Test episode pruning."""
old_episode = make_episode(
occurred_at=_utcnow() - timedelta(days=100),
importance_score=0.1,
outcome=Outcome.SUCCESS,
lessons_learned=[],
)
with patch.object(
service, "_get_episodic", new_callable=AsyncMock
) as mock_get_episodic:
mock_episodic = AsyncMock()
mock_episodic.get_recent.return_value = [old_episode]
mock_episodic.delete.return_value = True
mock_get_episodic.return_value = mock_episodic
result = await service.prune_old_episodes(project_id=uuid4())
assert result.items_processed == 1
assert result.items_pruned == 1
# =========================================================================
# Nightly Consolidation Tests
# =========================================================================
@pytest.mark.asyncio
async def test_run_nightly_consolidation(
self, service: MemoryConsolidationService
) -> None:
"""Test nightly consolidation workflow."""
with (
patch.object(
service,
"consolidate_episodes_to_facts",
new_callable=AsyncMock,
) as mock_facts,
patch.object(
service,
"consolidate_episodes_to_procedures",
new_callable=AsyncMock,
) as mock_procedures,
patch.object(
service,
"prune_old_episodes",
new_callable=AsyncMock,
) as mock_prune,
):
mock_facts.return_value = ConsolidationResult(
source_type="episodic",
target_type="semantic",
items_processed=10,
items_created=5,
)
mock_procedures.return_value = ConsolidationResult(
source_type="episodic",
target_type="procedural",
items_processed=10,
items_created=2,
)
mock_prune.return_value = ConsolidationResult(
source_type="episodic",
target_type="pruned",
items_pruned=3,
)
result = await service.run_nightly_consolidation(project_id=uuid4())
assert result.completed_at is not None
assert result.total_facts_created == 5
assert result.total_procedures_created == 2
assert result.total_pruned == 3
assert result.total_episodes_processed == 20
@pytest.mark.asyncio
async def test_run_nightly_consolidation_with_errors(
self, service: MemoryConsolidationService
) -> None:
"""Test nightly consolidation handles errors."""
with (
patch.object(
service,
"consolidate_episodes_to_facts",
new_callable=AsyncMock,
) as mock_facts,
patch.object(
service,
"consolidate_episodes_to_procedures",
new_callable=AsyncMock,
) as mock_procedures,
patch.object(
service,
"prune_old_episodes",
new_callable=AsyncMock,
) as mock_prune,
):
mock_facts.return_value = ConsolidationResult(
source_type="episodic",
target_type="semantic",
errors=["fact error"],
)
mock_procedures.return_value = ConsolidationResult(
source_type="episodic",
target_type="procedural",
)
mock_prune.return_value = ConsolidationResult(
source_type="episodic",
target_type="pruned",
)
result = await service.run_nightly_consolidation(project_id=uuid4())
assert "fact error" in result.errors