forked from cardosofelipe/fast-next-template
Bug Fixes: - Remove singleton pattern from consolidation/reflection services to prevent stale database session bugs (session is now passed per-request) - Add LRU eviction to MemoryToolService._working dict (max 1000 sessions) to prevent unbounded memory growth - Replace O(n) list.remove() with O(1) OrderedDict.move_to_end() in RetrievalCache for better performance under load - Use deque with maxlen for metrics histograms to prevent unbounded memory growth (circular buffer with 10k max samples) - Use full UUID for checkpoint IDs instead of 8-char prefix to avoid collision risk at scale (birthday paradox at ~50k checkpoints) Test Updates: - Update checkpoint test to expect 36-char UUID - Update reflection singleton tests to expect new factory behavior - Add reset_memory_reflection() no-op for backwards compatibility 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
770 lines
24 KiB
Python
770 lines
24 KiB
Python
# tests/unit/services/memory/reflection/test_service.py
|
|
"""Tests for Memory Reflection service."""
|
|
|
|
from datetime import UTC, datetime, timedelta
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from app.services.memory.reflection.service import (
|
|
MemoryReflection,
|
|
ReflectionConfig,
|
|
get_memory_reflection,
|
|
reset_memory_reflection,
|
|
)
|
|
from app.services.memory.reflection.types import (
|
|
AnomalyType,
|
|
FactorType,
|
|
InsightType,
|
|
PatternType,
|
|
TimeRange,
|
|
)
|
|
from app.services.memory.types import Episode, Outcome
|
|
|
|
pytestmark = pytest.mark.asyncio(loop_scope="function")
|
|
|
|
|
|
def create_mock_episode(
|
|
task_type: str = "test_task",
|
|
outcome: Outcome = Outcome.SUCCESS,
|
|
duration_seconds: float = 60.0,
|
|
tokens_used: int = 100,
|
|
actions: list | None = None,
|
|
occurred_at: datetime | None = None,
|
|
context_summary: str = "Test context",
|
|
) -> Episode:
|
|
"""Create a mock episode for testing."""
|
|
return Episode(
|
|
id=uuid4(),
|
|
project_id=uuid4(),
|
|
agent_instance_id=None,
|
|
agent_type_id=None,
|
|
session_id="session-123",
|
|
task_type=task_type,
|
|
task_description=f"Test {task_type}",
|
|
actions=actions or [{"type": "action1", "content": "test"}],
|
|
context_summary=context_summary,
|
|
outcome=outcome,
|
|
outcome_details="",
|
|
duration_seconds=duration_seconds,
|
|
tokens_used=tokens_used,
|
|
lessons_learned=[],
|
|
importance_score=0.5,
|
|
embedding=None,
|
|
occurred_at=occurred_at or datetime.now(UTC),
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
async def reset_singleton() -> None:
|
|
"""Reset singleton before each test."""
|
|
await reset_memory_reflection()
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_session() -> MagicMock:
|
|
"""Create mock database session."""
|
|
return MagicMock()
|
|
|
|
|
|
@pytest.fixture
|
|
def config() -> ReflectionConfig:
|
|
"""Create test configuration."""
|
|
return ReflectionConfig(
|
|
min_pattern_occurrences=2,
|
|
min_pattern_confidence=0.5,
|
|
min_sample_size_for_factor=3,
|
|
min_correlation_for_factor=0.2,
|
|
min_baseline_samples=5,
|
|
anomaly_std_dev_threshold=2.0,
|
|
min_insight_confidence=0.1, # Lower for testing
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def reflection(mock_session: MagicMock, config: ReflectionConfig) -> MemoryReflection:
|
|
"""Create reflection service."""
|
|
return MemoryReflection(session=mock_session, config=config)
|
|
|
|
|
|
class TestReflectionConfig:
|
|
"""Tests for ReflectionConfig."""
|
|
|
|
def test_default_values(self) -> None:
|
|
"""Should have sensible defaults."""
|
|
config = ReflectionConfig()
|
|
|
|
assert config.min_pattern_occurrences == 3
|
|
assert config.min_pattern_confidence == 0.6
|
|
assert config.min_sample_size_for_factor == 5
|
|
assert config.anomaly_std_dev_threshold == 2.0
|
|
assert config.max_episodes_to_analyze == 1000
|
|
|
|
def test_custom_values(self) -> None:
|
|
"""Should allow custom values."""
|
|
config = ReflectionConfig(
|
|
min_pattern_occurrences=5,
|
|
min_pattern_confidence=0.8,
|
|
)
|
|
|
|
assert config.min_pattern_occurrences == 5
|
|
assert config.min_pattern_confidence == 0.8
|
|
|
|
|
|
class TestPatternDetection:
|
|
"""Tests for pattern detection."""
|
|
|
|
async def test_detect_recurring_success_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect recurring success patterns."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
# Create episodes with high success rate for a task type
|
|
# Ensure timestamps are within time range
|
|
now = datetime.now(UTC)
|
|
episodes = [
|
|
create_mock_episode(
|
|
task_type="build",
|
|
outcome=Outcome.SUCCESS,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(8)
|
|
] + [
|
|
create_mock_episode(
|
|
task_type="build",
|
|
outcome=Outcome.FAILURE,
|
|
occurred_at=now - timedelta(hours=8 + i),
|
|
)
|
|
for i in range(2)
|
|
]
|
|
|
|
# Mock episodic memory
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
patterns = await reflection.analyze_patterns(project_id, time_range)
|
|
|
|
# Should find recurring success pattern for 'build' task
|
|
success_patterns = [
|
|
p for p in patterns if p.pattern_type == PatternType.RECURRING_SUCCESS
|
|
]
|
|
assert len(success_patterns) >= 1
|
|
assert any(p.name.find("build") >= 0 for p in success_patterns)
|
|
|
|
async def test_detect_recurring_failure_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect recurring failure patterns."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
# Create episodes with high failure rate
|
|
# Ensure timestamps are within time range
|
|
now = datetime.now(UTC)
|
|
episodes = [
|
|
create_mock_episode(
|
|
task_type="deploy",
|
|
outcome=Outcome.FAILURE,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(7)
|
|
] + [
|
|
create_mock_episode(
|
|
task_type="deploy",
|
|
outcome=Outcome.SUCCESS,
|
|
occurred_at=now - timedelta(hours=7 + i),
|
|
)
|
|
for i in range(3)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
patterns = await reflection.analyze_patterns(project_id, time_range)
|
|
|
|
failure_patterns = [
|
|
p for p in patterns if p.pattern_type == PatternType.RECURRING_FAILURE
|
|
]
|
|
assert len(failure_patterns) >= 1
|
|
|
|
async def test_detect_action_sequence_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect action sequence patterns."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
# Create episodes with same action sequence
|
|
# Ensure timestamps are within time range
|
|
now = datetime.now(UTC)
|
|
actions = [
|
|
{"type": "read_file"},
|
|
{"type": "analyze"},
|
|
{"type": "write_file"},
|
|
]
|
|
episodes = [
|
|
create_mock_episode(
|
|
actions=actions,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(5)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
patterns = await reflection.analyze_patterns(project_id, time_range)
|
|
|
|
action_patterns = [
|
|
p for p in patterns if p.pattern_type == PatternType.ACTION_SEQUENCE
|
|
]
|
|
assert len(action_patterns) >= 1
|
|
|
|
async def test_detect_temporal_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect temporal patterns."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
# Create episodes concentrated at a specific hour
|
|
base_time = datetime.now(UTC).replace(hour=10, minute=0)
|
|
episodes = [
|
|
create_mock_episode(occurred_at=base_time + timedelta(minutes=i * 5))
|
|
for i in range(10)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
patterns = await reflection.analyze_patterns(project_id, time_range)
|
|
|
|
# May or may not find temporal patterns depending on thresholds
|
|
# Just verify the analysis completes without error
|
|
assert isinstance(patterns, list)
|
|
|
|
async def test_empty_episodes_returns_empty(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should return empty list when no episodes."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=[])
|
|
reflection._episodic = mock_episodic
|
|
|
|
patterns = await reflection.analyze_patterns(project_id, time_range)
|
|
|
|
assert patterns == []
|
|
|
|
|
|
class TestSuccessFactors:
|
|
"""Tests for success factor identification."""
|
|
|
|
async def test_identify_action_factors(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should identify action-related success factors."""
|
|
project_id = uuid4()
|
|
|
|
# Create episodes where 'validate' action correlates with success
|
|
successful = [
|
|
create_mock_episode(
|
|
outcome=Outcome.SUCCESS,
|
|
actions=[{"type": "validate"}, {"type": "commit"}],
|
|
)
|
|
for _ in range(5)
|
|
]
|
|
failed = [
|
|
create_mock_episode(
|
|
outcome=Outcome.FAILURE,
|
|
actions=[{"type": "commit"}], # Missing validate
|
|
)
|
|
for _ in range(5)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
|
reflection._episodic = mock_episodic
|
|
|
|
factors = await reflection.identify_success_factors(project_id)
|
|
|
|
action_factors = [f for f in factors if f.factor_type == FactorType.ACTION]
|
|
assert len(action_factors) >= 0 # May or may not find based on thresholds
|
|
|
|
async def test_identify_timing_factors(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should identify timing-related factors."""
|
|
project_id = uuid4()
|
|
|
|
# Successful tasks are faster
|
|
successful = [
|
|
create_mock_episode(outcome=Outcome.SUCCESS, duration_seconds=30.0)
|
|
for _ in range(5)
|
|
]
|
|
# Failed tasks take longer
|
|
failed = [
|
|
create_mock_episode(outcome=Outcome.FAILURE, duration_seconds=120.0)
|
|
for _ in range(5)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
|
reflection._episodic = mock_episodic
|
|
|
|
factors = await reflection.identify_success_factors(project_id)
|
|
|
|
timing_factors = [f for f in factors if f.factor_type == FactorType.TIMING]
|
|
assert len(timing_factors) >= 1
|
|
|
|
async def test_identify_resource_factors(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should identify resource usage factors."""
|
|
project_id = uuid4()
|
|
|
|
# Successful tasks use fewer tokens
|
|
successful = [
|
|
create_mock_episode(outcome=Outcome.SUCCESS, tokens_used=100)
|
|
for _ in range(5)
|
|
]
|
|
# Failed tasks use more tokens
|
|
failed = [
|
|
create_mock_episode(outcome=Outcome.FAILURE, tokens_used=500)
|
|
for _ in range(5)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
|
reflection._episodic = mock_episodic
|
|
|
|
factors = await reflection.identify_success_factors(project_id)
|
|
|
|
resource_factors = [f for f in factors if f.factor_type == FactorType.RESOURCE]
|
|
assert len(resource_factors) >= 1
|
|
|
|
async def test_filter_by_task_type(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should filter by task type when specified."""
|
|
project_id = uuid4()
|
|
|
|
episodes = [
|
|
create_mock_episode(task_type="target_task", outcome=Outcome.SUCCESS)
|
|
for _ in range(5)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_by_task_type = AsyncMock(return_value=episodes)
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
await reflection.identify_success_factors(project_id, task_type="target_task")
|
|
|
|
mock_episodic.get_by_task_type.assert_called_once()
|
|
|
|
async def test_insufficient_samples(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should return empty when insufficient samples."""
|
|
project_id = uuid4()
|
|
|
|
# Only 2 episodes, config requires 3 minimum
|
|
episodes = [create_mock_episode() for _ in range(2)]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
factors = await reflection.identify_success_factors(project_id)
|
|
|
|
assert factors == []
|
|
|
|
|
|
class TestAnomalyDetection:
|
|
"""Tests for anomaly detection."""
|
|
|
|
async def test_detect_duration_anomaly(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect unusual duration anomalies."""
|
|
project_id = uuid4()
|
|
|
|
# Create baseline with consistent durations
|
|
now = datetime.now(UTC)
|
|
baseline = [
|
|
create_mock_episode(
|
|
duration_seconds=60.0,
|
|
occurred_at=now - timedelta(days=i),
|
|
)
|
|
for i in range(2, 10)
|
|
]
|
|
|
|
# Add recent anomaly with very long duration
|
|
anomalous = create_mock_episode(
|
|
duration_seconds=300.0, # 5x longer
|
|
occurred_at=now - timedelta(hours=1),
|
|
)
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
|
reflection._episodic = mock_episodic
|
|
|
|
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
|
|
|
duration_anomalies = [
|
|
a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_DURATION
|
|
]
|
|
assert len(duration_anomalies) >= 1
|
|
|
|
async def test_detect_unexpected_outcome_anomaly(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect unexpected outcome anomalies."""
|
|
project_id = uuid4()
|
|
|
|
now = datetime.now(UTC)
|
|
# Create baseline with high success rate
|
|
baseline = [
|
|
create_mock_episode(
|
|
task_type="reliable_task",
|
|
outcome=Outcome.SUCCESS,
|
|
occurred_at=now - timedelta(days=i),
|
|
)
|
|
for i in range(2, 10)
|
|
]
|
|
|
|
# Add recent failure for usually successful task
|
|
anomalous = create_mock_episode(
|
|
task_type="reliable_task",
|
|
outcome=Outcome.FAILURE,
|
|
occurred_at=now - timedelta(hours=1),
|
|
)
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
|
reflection._episodic = mock_episodic
|
|
|
|
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
|
|
|
outcome_anomalies = [
|
|
a for a in anomalies if a.anomaly_type == AnomalyType.UNEXPECTED_OUTCOME
|
|
]
|
|
assert len(outcome_anomalies) >= 1
|
|
|
|
async def test_detect_token_usage_anomaly(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect unusual token usage."""
|
|
project_id = uuid4()
|
|
|
|
now = datetime.now(UTC)
|
|
# Create baseline with consistent token usage
|
|
baseline = [
|
|
create_mock_episode(
|
|
tokens_used=100,
|
|
occurred_at=now - timedelta(days=i),
|
|
)
|
|
for i in range(2, 10)
|
|
]
|
|
|
|
# Add recent anomaly with very high token usage
|
|
anomalous = create_mock_episode(
|
|
tokens_used=1000, # 10x higher
|
|
occurred_at=now - timedelta(hours=1),
|
|
)
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
|
reflection._episodic = mock_episodic
|
|
|
|
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
|
|
|
token_anomalies = [
|
|
a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_TOKEN_USAGE
|
|
]
|
|
assert len(token_anomalies) >= 1
|
|
|
|
async def test_detect_failure_rate_spike(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should detect failure rate spikes."""
|
|
project_id = uuid4()
|
|
|
|
now = datetime.now(UTC)
|
|
# Create baseline with low failure rate
|
|
baseline = [
|
|
create_mock_episode(
|
|
outcome=Outcome.SUCCESS if i % 10 != 0 else Outcome.FAILURE,
|
|
occurred_at=now - timedelta(days=i % 30),
|
|
)
|
|
for i in range(30)
|
|
]
|
|
|
|
# Add recent failures (spike)
|
|
recent_failures = [
|
|
create_mock_episode(
|
|
outcome=Outcome.FAILURE,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(1, 6)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=baseline + recent_failures)
|
|
reflection._episodic = mock_episodic
|
|
|
|
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
|
|
|
# May or may not detect based on thresholds
|
|
# Just verify the analysis completes without error
|
|
assert isinstance(anomalies, list)
|
|
|
|
async def test_insufficient_baseline(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should return empty when insufficient baseline."""
|
|
project_id = uuid4()
|
|
|
|
# Only 3 episodes, config requires 5 minimum
|
|
episodes = [create_mock_episode() for _ in range(3)]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
|
|
|
assert anomalies == []
|
|
|
|
|
|
class TestInsightGeneration:
|
|
"""Tests for insight generation."""
|
|
|
|
async def test_generate_warning_insight_from_failure_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should generate warning insight from failure patterns."""
|
|
project_id = uuid4()
|
|
|
|
# Create episodes with recurring failure
|
|
episodes = [
|
|
create_mock_episode(task_type="failing_task", outcome=Outcome.FAILURE)
|
|
for _ in range(8)
|
|
] + [
|
|
create_mock_episode(task_type="failing_task", outcome=Outcome.SUCCESS)
|
|
for _ in range(2)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
insights = await reflection.generate_insights(project_id)
|
|
|
|
warning_insights = [
|
|
i for i in insights if i.insight_type == InsightType.WARNING
|
|
]
|
|
assert len(warning_insights) >= 1
|
|
|
|
async def test_generate_learning_insight_from_success_pattern(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should generate learning insight from success patterns."""
|
|
project_id = uuid4()
|
|
|
|
# Create episodes with recurring success
|
|
episodes = [
|
|
create_mock_episode(task_type="good_task", outcome=Outcome.SUCCESS)
|
|
for _ in range(9)
|
|
] + [
|
|
create_mock_episode(task_type="good_task", outcome=Outcome.FAILURE)
|
|
for _ in range(1)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
insights = await reflection.generate_insights(project_id)
|
|
|
|
learning_insights = [
|
|
i for i in insights if i.insight_type == InsightType.LEARNING
|
|
]
|
|
assert len(learning_insights) >= 0 # May depend on thresholds
|
|
|
|
async def test_generate_trend_insight(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should generate overall trend insight."""
|
|
project_id = uuid4()
|
|
|
|
# Create enough episodes with timestamps in range
|
|
now = datetime.now(UTC)
|
|
episodes = [
|
|
create_mock_episode(
|
|
outcome=Outcome.SUCCESS,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(10)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
insights = await reflection.generate_insights(project_id)
|
|
|
|
trend_insights = [i for i in insights if i.insight_type == InsightType.TREND]
|
|
assert len(trend_insights) >= 1
|
|
|
|
async def test_insights_sorted_by_priority(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should sort insights by priority."""
|
|
project_id = uuid4()
|
|
|
|
episodes = [create_mock_episode(outcome=Outcome.SUCCESS) for _ in range(10)]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
insights = await reflection.generate_insights(project_id)
|
|
|
|
if len(insights) >= 2:
|
|
for i in range(len(insights) - 1):
|
|
assert insights[i].priority >= insights[i + 1].priority
|
|
|
|
|
|
class TestComprehensiveReflection:
|
|
"""Tests for comprehensive reflect() method."""
|
|
|
|
async def test_reflect_returns_all_components(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should return patterns, factors, anomalies, and insights."""
|
|
project_id = uuid4()
|
|
time_range = TimeRange.last_days(7)
|
|
|
|
now = datetime.now(UTC)
|
|
episodes = [
|
|
create_mock_episode(
|
|
task_type="test_task",
|
|
outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE,
|
|
occurred_at=now - timedelta(hours=i),
|
|
)
|
|
for i in range(20)
|
|
]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
result = await reflection.reflect(project_id, time_range)
|
|
|
|
assert result.patterns is not None
|
|
assert result.factors is not None
|
|
assert result.anomalies is not None
|
|
assert result.insights is not None
|
|
assert result.episodes_analyzed >= 0
|
|
assert result.analysis_duration_seconds >= 0
|
|
|
|
async def test_reflect_with_default_time_range(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should use default 7-day time range."""
|
|
project_id = uuid4()
|
|
|
|
episodes = [create_mock_episode() for _ in range(5)]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
result = await reflection.reflect(project_id)
|
|
|
|
assert 6.9 <= result.time_range.duration_days <= 7.1
|
|
|
|
async def test_reflect_summary(
|
|
self,
|
|
reflection: MemoryReflection,
|
|
) -> None:
|
|
"""Should generate meaningful summary."""
|
|
project_id = uuid4()
|
|
|
|
episodes = [create_mock_episode() for _ in range(10)]
|
|
|
|
mock_episodic = MagicMock()
|
|
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
|
reflection._episodic = mock_episodic
|
|
|
|
result = await reflection.reflect(project_id)
|
|
|
|
summary = result.summary
|
|
assert "Reflection Analysis" in summary
|
|
assert "Episodes analyzed" in summary
|
|
|
|
|
|
class TestFactoryFunction:
|
|
"""Tests for factory function behavior.
|
|
|
|
Note: The singleton pattern was removed to avoid stale database session bugs.
|
|
Each call now creates a fresh instance, which is safer for request-scoped usage.
|
|
"""
|
|
|
|
async def test_get_memory_reflection_creates_new_instance(
|
|
self,
|
|
mock_session: MagicMock,
|
|
) -> None:
|
|
"""Should create new instance each call (no singleton for session safety)."""
|
|
r1 = await get_memory_reflection(mock_session)
|
|
r2 = await get_memory_reflection(mock_session)
|
|
|
|
# Different instances to avoid stale session issues
|
|
assert r1 is not r2
|
|
|
|
async def test_reset_is_no_op(
|
|
self,
|
|
mock_session: MagicMock,
|
|
) -> None:
|
|
"""Reset should be a no-op (kept for API compatibility)."""
|
|
r1 = await get_memory_reflection(mock_session)
|
|
await reset_memory_reflection() # Should not raise
|
|
r2 = await get_memory_reflection(mock_session)
|
|
|
|
# Still creates new instances (reset is no-op now)
|
|
assert r1 is not r2
|