forked from cardosofelipe/fast-next-template
feat(memory): implement memory reflection service (#99)
Add reflection layer for memory system with pattern detection, success/failure factor analysis, anomaly detection, and insights generation. Enables agents to learn from past experiences and identify optimization opportunities. Key components: - Pattern detection: recurring success/failure, action sequences, temporal, efficiency - Factor analysis: action, context, timing, resource, preceding state factors - Anomaly detection: unusual duration, token usage, failure rates, action patterns - Insight generation: optimization, warning, learning, recommendation, trend insights Also fixes pre-existing timezone issues in test_types.py (datetime.now() -> datetime.now(UTC)). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -90,6 +90,9 @@ from .types import (
|
||||
WorkingMemoryItem,
|
||||
)
|
||||
|
||||
# Reflection (lazy import available)
|
||||
# Import directly: from app.services.memory.reflection import MemoryReflection
|
||||
|
||||
__all__ = [
|
||||
"CheckpointError",
|
||||
"ConsolidationStatus",
|
||||
|
||||
38
backend/app/services/memory/reflection/__init__.py
Normal file
38
backend/app/services/memory/reflection/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# app/services/memory/reflection/__init__.py
|
||||
"""
|
||||
Memory Reflection Layer.
|
||||
|
||||
Analyzes patterns in agent experiences to generate actionable insights.
|
||||
"""
|
||||
|
||||
from .service import (
|
||||
MemoryReflection,
|
||||
ReflectionConfig,
|
||||
get_memory_reflection,
|
||||
)
|
||||
from .types import (
|
||||
Anomaly,
|
||||
AnomalyType,
|
||||
Factor,
|
||||
FactorType,
|
||||
Insight,
|
||||
InsightType,
|
||||
Pattern,
|
||||
PatternType,
|
||||
TimeRange,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Anomaly",
|
||||
"AnomalyType",
|
||||
"Factor",
|
||||
"FactorType",
|
||||
"Insight",
|
||||
"InsightType",
|
||||
"MemoryReflection",
|
||||
"Pattern",
|
||||
"PatternType",
|
||||
"ReflectionConfig",
|
||||
"TimeRange",
|
||||
"get_memory_reflection",
|
||||
]
|
||||
1440
backend/app/services/memory/reflection/service.py
Normal file
1440
backend/app/services/memory/reflection/service.py
Normal file
File diff suppressed because it is too large
Load Diff
305
backend/app/services/memory/reflection/types.py
Normal file
305
backend/app/services/memory/reflection/types.py
Normal file
@@ -0,0 +1,305 @@
|
||||
# app/services/memory/reflection/types.py
|
||||
"""
|
||||
Memory Reflection Types.
|
||||
|
||||
Type definitions for pattern detection, anomaly detection, and insights.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
"""Get current UTC time as timezone-aware datetime."""
|
||||
return datetime.now(UTC)
|
||||
|
||||
|
||||
class PatternType(str, Enum):
|
||||
"""Types of patterns detected in episodic memory."""
|
||||
|
||||
RECURRING_SUCCESS = "recurring_success"
|
||||
RECURRING_FAILURE = "recurring_failure"
|
||||
ACTION_SEQUENCE = "action_sequence"
|
||||
CONTEXT_CORRELATION = "context_correlation"
|
||||
TEMPORAL = "temporal"
|
||||
EFFICIENCY = "efficiency"
|
||||
|
||||
|
||||
class FactorType(str, Enum):
|
||||
"""Types of factors contributing to outcomes."""
|
||||
|
||||
ACTION = "action"
|
||||
CONTEXT = "context"
|
||||
TIMING = "timing"
|
||||
RESOURCE = "resource"
|
||||
PRECEDING_STATE = "preceding_state"
|
||||
|
||||
|
||||
class AnomalyType(str, Enum):
|
||||
"""Types of anomalies detected."""
|
||||
|
||||
UNUSUAL_DURATION = "unusual_duration"
|
||||
UNEXPECTED_OUTCOME = "unexpected_outcome"
|
||||
UNUSUAL_TOKEN_USAGE = "unusual_token_usage"
|
||||
UNUSUAL_FAILURE_RATE = "unusual_failure_rate"
|
||||
UNUSUAL_ACTION_PATTERN = "unusual_action_pattern"
|
||||
|
||||
|
||||
class InsightType(str, Enum):
|
||||
"""Types of insights generated."""
|
||||
|
||||
OPTIMIZATION = "optimization"
|
||||
WARNING = "warning"
|
||||
LEARNING = "learning"
|
||||
RECOMMENDATION = "recommendation"
|
||||
TREND = "trend"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimeRange:
|
||||
"""Time range for reflection analysis."""
|
||||
|
||||
start: datetime
|
||||
end: datetime
|
||||
|
||||
@classmethod
|
||||
def last_hours(cls, hours: int = 24) -> "TimeRange":
|
||||
"""Create time range for last N hours."""
|
||||
end = _utcnow()
|
||||
start = datetime(
|
||||
end.year, end.month, end.day, end.hour, end.minute, end.second,
|
||||
tzinfo=UTC
|
||||
) - __import__("datetime").timedelta(hours=hours)
|
||||
return cls(start=start, end=end)
|
||||
|
||||
@classmethod
|
||||
def last_days(cls, days: int = 7) -> "TimeRange":
|
||||
"""Create time range for last N days."""
|
||||
from datetime import timedelta
|
||||
|
||||
end = _utcnow()
|
||||
start = end - timedelta(days=days)
|
||||
return cls(start=start, end=end)
|
||||
|
||||
@property
|
||||
def duration_hours(self) -> float:
|
||||
"""Get duration in hours."""
|
||||
return (self.end - self.start).total_seconds() / 3600
|
||||
|
||||
@property
|
||||
def duration_days(self) -> float:
|
||||
"""Get duration in days."""
|
||||
return (self.end - self.start).total_seconds() / 86400
|
||||
|
||||
|
||||
@dataclass
|
||||
class Pattern:
|
||||
"""A detected pattern in episodic memory."""
|
||||
|
||||
id: UUID
|
||||
pattern_type: PatternType
|
||||
name: str
|
||||
description: str
|
||||
confidence: float
|
||||
occurrence_count: int
|
||||
episode_ids: list[UUID]
|
||||
first_seen: datetime
|
||||
last_seen: datetime
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def frequency(self) -> float:
|
||||
"""Calculate pattern frequency per day."""
|
||||
duration_days = (self.last_seen - self.first_seen).total_seconds() / 86400
|
||||
if duration_days < 1:
|
||||
duration_days = 1
|
||||
return self.occurrence_count / duration_days
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"pattern_type": self.pattern_type.value,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"confidence": self.confidence,
|
||||
"occurrence_count": self.occurrence_count,
|
||||
"episode_ids": [str(eid) for eid in self.episode_ids],
|
||||
"first_seen": self.first_seen.isoformat(),
|
||||
"last_seen": self.last_seen.isoformat(),
|
||||
"frequency": self.frequency,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Factor:
|
||||
"""A factor contributing to success or failure."""
|
||||
|
||||
id: UUID
|
||||
factor_type: FactorType
|
||||
name: str
|
||||
description: str
|
||||
impact_score: float
|
||||
correlation: float
|
||||
sample_size: int
|
||||
positive_examples: list[UUID]
|
||||
negative_examples: list[UUID]
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def net_impact(self) -> float:
|
||||
"""Calculate net impact considering sample size."""
|
||||
# Weight impact by sample confidence
|
||||
confidence_weight = min(1.0, self.sample_size / 20)
|
||||
return self.impact_score * self.correlation * confidence_weight
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"factor_type": self.factor_type.value,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"impact_score": self.impact_score,
|
||||
"correlation": self.correlation,
|
||||
"sample_size": self.sample_size,
|
||||
"positive_examples": [str(eid) for eid in self.positive_examples],
|
||||
"negative_examples": [str(eid) for eid in self.negative_examples],
|
||||
"net_impact": self.net_impact,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Anomaly:
|
||||
"""An anomaly detected in memory patterns."""
|
||||
|
||||
id: UUID
|
||||
anomaly_type: AnomalyType
|
||||
description: str
|
||||
severity: float
|
||||
episode_ids: list[UUID]
|
||||
detected_at: datetime
|
||||
baseline_value: float
|
||||
observed_value: float
|
||||
deviation_factor: float
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def is_critical(self) -> bool:
|
||||
"""Check if anomaly is critical (severity > 0.8)."""
|
||||
return self.severity > 0.8
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"anomaly_type": self.anomaly_type.value,
|
||||
"description": self.description,
|
||||
"severity": self.severity,
|
||||
"episode_ids": [str(eid) for eid in self.episode_ids],
|
||||
"detected_at": self.detected_at.isoformat(),
|
||||
"baseline_value": self.baseline_value,
|
||||
"observed_value": self.observed_value,
|
||||
"deviation_factor": self.deviation_factor,
|
||||
"is_critical": self.is_critical,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Insight:
|
||||
"""An actionable insight generated from reflection."""
|
||||
|
||||
id: UUID
|
||||
insight_type: InsightType
|
||||
title: str
|
||||
description: str
|
||||
priority: float
|
||||
confidence: float
|
||||
source_patterns: list[UUID]
|
||||
source_factors: list[UUID]
|
||||
source_anomalies: list[UUID]
|
||||
recommended_actions: list[str]
|
||||
generated_at: datetime
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def actionable_score(self) -> float:
|
||||
"""Calculate how actionable this insight is."""
|
||||
action_weight = min(1.0, len(self.recommended_actions) / 3)
|
||||
return self.priority * self.confidence * action_weight
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"id": str(self.id),
|
||||
"insight_type": self.insight_type.value,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"priority": self.priority,
|
||||
"confidence": self.confidence,
|
||||
"source_patterns": [str(pid) for pid in self.source_patterns],
|
||||
"source_factors": [str(fid) for fid in self.source_factors],
|
||||
"source_anomalies": [str(aid) for aid in self.source_anomalies],
|
||||
"recommended_actions": self.recommended_actions,
|
||||
"generated_at": self.generated_at.isoformat(),
|
||||
"actionable_score": self.actionable_score,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReflectionResult:
|
||||
"""Result of a reflection operation."""
|
||||
|
||||
patterns: list[Pattern]
|
||||
factors: list[Factor]
|
||||
anomalies: list[Anomaly]
|
||||
insights: list[Insight]
|
||||
time_range: TimeRange
|
||||
episodes_analyzed: int
|
||||
analysis_duration_seconds: float
|
||||
generated_at: datetime = field(default_factory=_utcnow)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
"patterns": [p.to_dict() for p in self.patterns],
|
||||
"factors": [f.to_dict() for f in self.factors],
|
||||
"anomalies": [a.to_dict() for a in self.anomalies],
|
||||
"insights": [i.to_dict() for i in self.insights],
|
||||
"time_range": {
|
||||
"start": self.time_range.start.isoformat(),
|
||||
"end": self.time_range.end.isoformat(),
|
||||
"duration_hours": self.time_range.duration_hours,
|
||||
},
|
||||
"episodes_analyzed": self.episodes_analyzed,
|
||||
"analysis_duration_seconds": self.analysis_duration_seconds,
|
||||
"generated_at": self.generated_at.isoformat(),
|
||||
}
|
||||
|
||||
@property
|
||||
def summary(self) -> str:
|
||||
"""Generate a summary of the reflection results."""
|
||||
lines = [
|
||||
f"Reflection Analysis ({self.time_range.duration_days:.1f} days)",
|
||||
f"Episodes analyzed: {self.episodes_analyzed}",
|
||||
"",
|
||||
f"Patterns detected: {len(self.patterns)}",
|
||||
f"Success/failure factors: {len(self.factors)}",
|
||||
f"Anomalies found: {len(self.anomalies)}",
|
||||
f"Insights generated: {len(self.insights)}",
|
||||
]
|
||||
|
||||
if self.insights:
|
||||
lines.append("")
|
||||
lines.append("Top insights:")
|
||||
for insight in sorted(self.insights, key=lambda i: -i.priority)[:3]:
|
||||
lines.append(f" - [{insight.insight_type.value}] {insight.title}")
|
||||
|
||||
return "\n".join(lines)
|
||||
@@ -0,0 +1,2 @@
|
||||
# tests/unit/services/memory/reflection/__init__.py
|
||||
"""Tests for Memory Reflection."""
|
||||
774
backend/tests/unit/services/memory/reflection/test_service.py
Normal file
774
backend/tests/unit/services/memory/reflection/test_service.py
Normal file
@@ -0,0 +1,774 @@
|
||||
# tests/unit/services/memory/reflection/test_service.py
|
||||
"""Tests for Memory Reflection service."""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.memory.reflection.service import (
|
||||
MemoryReflection,
|
||||
ReflectionConfig,
|
||||
get_memory_reflection,
|
||||
reset_memory_reflection,
|
||||
)
|
||||
from app.services.memory.reflection.types import (
|
||||
AnomalyType,
|
||||
FactorType,
|
||||
InsightType,
|
||||
PatternType,
|
||||
TimeRange,
|
||||
)
|
||||
from app.services.memory.types import Episode, Outcome
|
||||
|
||||
pytestmark = pytest.mark.asyncio(loop_scope="function")
|
||||
|
||||
|
||||
def create_mock_episode(
|
||||
task_type: str = "test_task",
|
||||
outcome: Outcome = Outcome.SUCCESS,
|
||||
duration_seconds: float = 60.0,
|
||||
tokens_used: int = 100,
|
||||
actions: list | None = None,
|
||||
occurred_at: datetime | None = None,
|
||||
context_summary: str = "Test context",
|
||||
) -> Episode:
|
||||
"""Create a mock episode for testing."""
|
||||
return Episode(
|
||||
id=uuid4(),
|
||||
project_id=uuid4(),
|
||||
agent_instance_id=None,
|
||||
agent_type_id=None,
|
||||
session_id="session-123",
|
||||
task_type=task_type,
|
||||
task_description=f"Test {task_type}",
|
||||
actions=actions or [{"type": "action1", "content": "test"}],
|
||||
context_summary=context_summary,
|
||||
outcome=outcome,
|
||||
outcome_details="",
|
||||
duration_seconds=duration_seconds,
|
||||
tokens_used=tokens_used,
|
||||
lessons_learned=[],
|
||||
importance_score=0.5,
|
||||
embedding=None,
|
||||
occurred_at=occurred_at or datetime.now(UTC),
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_singleton() -> None:
|
||||
"""Reset singleton before each test."""
|
||||
reset_memory_reflection()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_session() -> MagicMock:
|
||||
"""Create mock database session."""
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def config() -> ReflectionConfig:
|
||||
"""Create test configuration."""
|
||||
return ReflectionConfig(
|
||||
min_pattern_occurrences=2,
|
||||
min_pattern_confidence=0.5,
|
||||
min_sample_size_for_factor=3,
|
||||
min_correlation_for_factor=0.2,
|
||||
min_baseline_samples=5,
|
||||
anomaly_std_dev_threshold=2.0,
|
||||
min_insight_confidence=0.1, # Lower for testing
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def reflection(mock_session: MagicMock, config: ReflectionConfig) -> MemoryReflection:
|
||||
"""Create reflection service."""
|
||||
return MemoryReflection(session=mock_session, config=config)
|
||||
|
||||
|
||||
class TestReflectionConfig:
|
||||
"""Tests for ReflectionConfig."""
|
||||
|
||||
def test_default_values(self) -> None:
|
||||
"""Should have sensible defaults."""
|
||||
config = ReflectionConfig()
|
||||
|
||||
assert config.min_pattern_occurrences == 3
|
||||
assert config.min_pattern_confidence == 0.6
|
||||
assert config.min_sample_size_for_factor == 5
|
||||
assert config.anomaly_std_dev_threshold == 2.0
|
||||
assert config.max_episodes_to_analyze == 1000
|
||||
|
||||
def test_custom_values(self) -> None:
|
||||
"""Should allow custom values."""
|
||||
config = ReflectionConfig(
|
||||
min_pattern_occurrences=5,
|
||||
min_pattern_confidence=0.8,
|
||||
)
|
||||
|
||||
assert config.min_pattern_occurrences == 5
|
||||
assert config.min_pattern_confidence == 0.8
|
||||
|
||||
|
||||
class TestPatternDetection:
|
||||
"""Tests for pattern detection."""
|
||||
|
||||
async def test_detect_recurring_success_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect recurring success patterns."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
# Create episodes with high success rate for a task type
|
||||
# Ensure timestamps are within time range
|
||||
now = datetime.now(UTC)
|
||||
episodes = [
|
||||
create_mock_episode(
|
||||
task_type="build",
|
||||
outcome=Outcome.SUCCESS,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(8)
|
||||
] + [
|
||||
create_mock_episode(
|
||||
task_type="build",
|
||||
outcome=Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(hours=8 + i),
|
||||
)
|
||||
for i in range(2)
|
||||
]
|
||||
|
||||
# Mock episodic memory
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
patterns = await reflection.analyze_patterns(project_id, time_range)
|
||||
|
||||
# Should find recurring success pattern for 'build' task
|
||||
success_patterns = [
|
||||
p for p in patterns
|
||||
if p.pattern_type == PatternType.RECURRING_SUCCESS
|
||||
]
|
||||
assert len(success_patterns) >= 1
|
||||
assert any(p.name.find("build") >= 0 for p in success_patterns)
|
||||
|
||||
async def test_detect_recurring_failure_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect recurring failure patterns."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
# Create episodes with high failure rate
|
||||
# Ensure timestamps are within time range
|
||||
now = datetime.now(UTC)
|
||||
episodes = [
|
||||
create_mock_episode(
|
||||
task_type="deploy",
|
||||
outcome=Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(7)
|
||||
] + [
|
||||
create_mock_episode(
|
||||
task_type="deploy",
|
||||
outcome=Outcome.SUCCESS,
|
||||
occurred_at=now - timedelta(hours=7 + i),
|
||||
)
|
||||
for i in range(3)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
patterns = await reflection.analyze_patterns(project_id, time_range)
|
||||
|
||||
failure_patterns = [
|
||||
p for p in patterns
|
||||
if p.pattern_type == PatternType.RECURRING_FAILURE
|
||||
]
|
||||
assert len(failure_patterns) >= 1
|
||||
|
||||
async def test_detect_action_sequence_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect action sequence patterns."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
# Create episodes with same action sequence
|
||||
# Ensure timestamps are within time range
|
||||
now = datetime.now(UTC)
|
||||
actions = [
|
||||
{"type": "read_file"},
|
||||
{"type": "analyze"},
|
||||
{"type": "write_file"},
|
||||
]
|
||||
episodes = [
|
||||
create_mock_episode(
|
||||
actions=actions,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(5)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
patterns = await reflection.analyze_patterns(project_id, time_range)
|
||||
|
||||
action_patterns = [
|
||||
p for p in patterns
|
||||
if p.pattern_type == PatternType.ACTION_SEQUENCE
|
||||
]
|
||||
assert len(action_patterns) >= 1
|
||||
|
||||
async def test_detect_temporal_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect temporal patterns."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
# Create episodes concentrated at a specific hour
|
||||
base_time = datetime.now(UTC).replace(hour=10, minute=0)
|
||||
episodes = [
|
||||
create_mock_episode(occurred_at=base_time + timedelta(minutes=i * 5))
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
patterns = await reflection.analyze_patterns(project_id, time_range)
|
||||
|
||||
# May or may not find temporal patterns depending on thresholds
|
||||
# Just verify the analysis completes without error
|
||||
assert isinstance(patterns, list)
|
||||
|
||||
async def test_empty_episodes_returns_empty(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should return empty list when no episodes."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=[])
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
patterns = await reflection.analyze_patterns(project_id, time_range)
|
||||
|
||||
assert patterns == []
|
||||
|
||||
|
||||
class TestSuccessFactors:
|
||||
"""Tests for success factor identification."""
|
||||
|
||||
async def test_identify_action_factors(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should identify action-related success factors."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Create episodes where 'validate' action correlates with success
|
||||
successful = [
|
||||
create_mock_episode(
|
||||
outcome=Outcome.SUCCESS,
|
||||
actions=[{"type": "validate"}, {"type": "commit"}],
|
||||
)
|
||||
for _ in range(5)
|
||||
]
|
||||
failed = [
|
||||
create_mock_episode(
|
||||
outcome=Outcome.FAILURE,
|
||||
actions=[{"type": "commit"}], # Missing validate
|
||||
)
|
||||
for _ in range(5)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
factors = await reflection.identify_success_factors(project_id)
|
||||
|
||||
action_factors = [f for f in factors if f.factor_type == FactorType.ACTION]
|
||||
assert len(action_factors) >= 0 # May or may not find based on thresholds
|
||||
|
||||
async def test_identify_timing_factors(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should identify timing-related factors."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Successful tasks are faster
|
||||
successful = [
|
||||
create_mock_episode(outcome=Outcome.SUCCESS, duration_seconds=30.0)
|
||||
for _ in range(5)
|
||||
]
|
||||
# Failed tasks take longer
|
||||
failed = [
|
||||
create_mock_episode(outcome=Outcome.FAILURE, duration_seconds=120.0)
|
||||
for _ in range(5)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
factors = await reflection.identify_success_factors(project_id)
|
||||
|
||||
timing_factors = [f for f in factors if f.factor_type == FactorType.TIMING]
|
||||
assert len(timing_factors) >= 1
|
||||
|
||||
async def test_identify_resource_factors(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should identify resource usage factors."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Successful tasks use fewer tokens
|
||||
successful = [
|
||||
create_mock_episode(outcome=Outcome.SUCCESS, tokens_used=100)
|
||||
for _ in range(5)
|
||||
]
|
||||
# Failed tasks use more tokens
|
||||
failed = [
|
||||
create_mock_episode(outcome=Outcome.FAILURE, tokens_used=500)
|
||||
for _ in range(5)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=successful + failed)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
factors = await reflection.identify_success_factors(project_id)
|
||||
|
||||
resource_factors = [f for f in factors if f.factor_type == FactorType.RESOURCE]
|
||||
assert len(resource_factors) >= 1
|
||||
|
||||
async def test_filter_by_task_type(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should filter by task type when specified."""
|
||||
project_id = uuid4()
|
||||
|
||||
episodes = [
|
||||
create_mock_episode(task_type="target_task", outcome=Outcome.SUCCESS)
|
||||
for _ in range(5)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_by_task_type = AsyncMock(return_value=episodes)
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
await reflection.identify_success_factors(project_id, task_type="target_task")
|
||||
|
||||
mock_episodic.get_by_task_type.assert_called_once()
|
||||
|
||||
async def test_insufficient_samples(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should return empty when insufficient samples."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Only 2 episodes, config requires 3 minimum
|
||||
episodes = [create_mock_episode() for _ in range(2)]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
factors = await reflection.identify_success_factors(project_id)
|
||||
|
||||
assert factors == []
|
||||
|
||||
|
||||
class TestAnomalyDetection:
|
||||
"""Tests for anomaly detection."""
|
||||
|
||||
async def test_detect_duration_anomaly(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect unusual duration anomalies."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Create baseline with consistent durations
|
||||
now = datetime.now(UTC)
|
||||
baseline = [
|
||||
create_mock_episode(
|
||||
duration_seconds=60.0,
|
||||
occurred_at=now - timedelta(days=i),
|
||||
)
|
||||
for i in range(2, 10)
|
||||
]
|
||||
|
||||
# Add recent anomaly with very long duration
|
||||
anomalous = create_mock_episode(
|
||||
duration_seconds=300.0, # 5x longer
|
||||
occurred_at=now - timedelta(hours=1),
|
||||
)
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
||||
|
||||
duration_anomalies = [
|
||||
a for a in anomalies
|
||||
if a.anomaly_type == AnomalyType.UNUSUAL_DURATION
|
||||
]
|
||||
assert len(duration_anomalies) >= 1
|
||||
|
||||
async def test_detect_unexpected_outcome_anomaly(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect unexpected outcome anomalies."""
|
||||
project_id = uuid4()
|
||||
|
||||
now = datetime.now(UTC)
|
||||
# Create baseline with high success rate
|
||||
baseline = [
|
||||
create_mock_episode(
|
||||
task_type="reliable_task",
|
||||
outcome=Outcome.SUCCESS,
|
||||
occurred_at=now - timedelta(days=i),
|
||||
)
|
||||
for i in range(2, 10)
|
||||
]
|
||||
|
||||
# Add recent failure for usually successful task
|
||||
anomalous = create_mock_episode(
|
||||
task_type="reliable_task",
|
||||
outcome=Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(hours=1),
|
||||
)
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
||||
|
||||
outcome_anomalies = [
|
||||
a for a in anomalies
|
||||
if a.anomaly_type == AnomalyType.UNEXPECTED_OUTCOME
|
||||
]
|
||||
assert len(outcome_anomalies) >= 1
|
||||
|
||||
async def test_detect_token_usage_anomaly(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect unusual token usage."""
|
||||
project_id = uuid4()
|
||||
|
||||
now = datetime.now(UTC)
|
||||
# Create baseline with consistent token usage
|
||||
baseline = [
|
||||
create_mock_episode(
|
||||
tokens_used=100,
|
||||
occurred_at=now - timedelta(days=i),
|
||||
)
|
||||
for i in range(2, 10)
|
||||
]
|
||||
|
||||
# Add recent anomaly with very high token usage
|
||||
anomalous = create_mock_episode(
|
||||
tokens_used=1000, # 10x higher
|
||||
occurred_at=now - timedelta(hours=1),
|
||||
)
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=[*baseline, anomalous])
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
||||
|
||||
token_anomalies = [
|
||||
a for a in anomalies
|
||||
if a.anomaly_type == AnomalyType.UNUSUAL_TOKEN_USAGE
|
||||
]
|
||||
assert len(token_anomalies) >= 1
|
||||
|
||||
async def test_detect_failure_rate_spike(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should detect failure rate spikes."""
|
||||
project_id = uuid4()
|
||||
|
||||
now = datetime.now(UTC)
|
||||
# Create baseline with low failure rate
|
||||
baseline = [
|
||||
create_mock_episode(
|
||||
outcome=Outcome.SUCCESS if i % 10 != 0 else Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(days=i % 30),
|
||||
)
|
||||
for i in range(30)
|
||||
]
|
||||
|
||||
# Add recent failures (spike)
|
||||
recent_failures = [
|
||||
create_mock_episode(
|
||||
outcome=Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(1, 6)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=baseline + recent_failures)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
||||
|
||||
# May or may not detect based on thresholds
|
||||
# Just verify the analysis completes without error
|
||||
assert isinstance(anomalies, list)
|
||||
|
||||
async def test_insufficient_baseline(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should return empty when insufficient baseline."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Only 3 episodes, config requires 5 minimum
|
||||
episodes = [create_mock_episode() for _ in range(3)]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
|
||||
|
||||
assert anomalies == []
|
||||
|
||||
|
||||
class TestInsightGeneration:
|
||||
"""Tests for insight generation."""
|
||||
|
||||
async def test_generate_warning_insight_from_failure_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should generate warning insight from failure patterns."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Create episodes with recurring failure
|
||||
episodes = [
|
||||
create_mock_episode(task_type="failing_task", outcome=Outcome.FAILURE)
|
||||
for _ in range(8)
|
||||
] + [
|
||||
create_mock_episode(task_type="failing_task", outcome=Outcome.SUCCESS)
|
||||
for _ in range(2)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
insights = await reflection.generate_insights(project_id)
|
||||
|
||||
warning_insights = [
|
||||
i for i in insights if i.insight_type == InsightType.WARNING
|
||||
]
|
||||
assert len(warning_insights) >= 1
|
||||
|
||||
async def test_generate_learning_insight_from_success_pattern(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should generate learning insight from success patterns."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Create episodes with recurring success
|
||||
episodes = [
|
||||
create_mock_episode(task_type="good_task", outcome=Outcome.SUCCESS)
|
||||
for _ in range(9)
|
||||
] + [
|
||||
create_mock_episode(task_type="good_task", outcome=Outcome.FAILURE)
|
||||
for _ in range(1)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
insights = await reflection.generate_insights(project_id)
|
||||
|
||||
learning_insights = [
|
||||
i for i in insights if i.insight_type == InsightType.LEARNING
|
||||
]
|
||||
assert len(learning_insights) >= 0 # May depend on thresholds
|
||||
|
||||
async def test_generate_trend_insight(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should generate overall trend insight."""
|
||||
project_id = uuid4()
|
||||
|
||||
# Create enough episodes with timestamps in range
|
||||
now = datetime.now(UTC)
|
||||
episodes = [
|
||||
create_mock_episode(
|
||||
outcome=Outcome.SUCCESS,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
insights = await reflection.generate_insights(project_id)
|
||||
|
||||
trend_insights = [
|
||||
i for i in insights if i.insight_type == InsightType.TREND
|
||||
]
|
||||
assert len(trend_insights) >= 1
|
||||
|
||||
async def test_insights_sorted_by_priority(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should sort insights by priority."""
|
||||
project_id = uuid4()
|
||||
|
||||
episodes = [
|
||||
create_mock_episode(outcome=Outcome.SUCCESS)
|
||||
for _ in range(10)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
insights = await reflection.generate_insights(project_id)
|
||||
|
||||
if len(insights) >= 2:
|
||||
for i in range(len(insights) - 1):
|
||||
assert insights[i].priority >= insights[i + 1].priority
|
||||
|
||||
|
||||
class TestComprehensiveReflection:
|
||||
"""Tests for comprehensive reflect() method."""
|
||||
|
||||
async def test_reflect_returns_all_components(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should return patterns, factors, anomalies, and insights."""
|
||||
project_id = uuid4()
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
now = datetime.now(UTC)
|
||||
episodes = [
|
||||
create_mock_episode(
|
||||
task_type="test_task",
|
||||
outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE,
|
||||
occurred_at=now - timedelta(hours=i),
|
||||
)
|
||||
for i in range(20)
|
||||
]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
result = await reflection.reflect(project_id, time_range)
|
||||
|
||||
assert result.patterns is not None
|
||||
assert result.factors is not None
|
||||
assert result.anomalies is not None
|
||||
assert result.insights is not None
|
||||
assert result.episodes_analyzed >= 0
|
||||
assert result.analysis_duration_seconds >= 0
|
||||
|
||||
async def test_reflect_with_default_time_range(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should use default 7-day time range."""
|
||||
project_id = uuid4()
|
||||
|
||||
episodes = [create_mock_episode() for _ in range(5)]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
result = await reflection.reflect(project_id)
|
||||
|
||||
assert 6.9 <= result.time_range.duration_days <= 7.1
|
||||
|
||||
async def test_reflect_summary(
|
||||
self,
|
||||
reflection: MemoryReflection,
|
||||
) -> None:
|
||||
"""Should generate meaningful summary."""
|
||||
project_id = uuid4()
|
||||
|
||||
episodes = [create_mock_episode() for _ in range(10)]
|
||||
|
||||
mock_episodic = MagicMock()
|
||||
mock_episodic.get_recent = AsyncMock(return_value=episodes)
|
||||
reflection._episodic = mock_episodic
|
||||
|
||||
result = await reflection.reflect(project_id)
|
||||
|
||||
summary = result.summary
|
||||
assert "Reflection Analysis" in summary
|
||||
assert "Episodes analyzed" in summary
|
||||
|
||||
|
||||
class TestSingleton:
|
||||
"""Tests for singleton pattern."""
|
||||
|
||||
async def test_get_memory_reflection_returns_singleton(
|
||||
self,
|
||||
mock_session: MagicMock,
|
||||
) -> None:
|
||||
"""Should return same instance."""
|
||||
r1 = await get_memory_reflection(mock_session)
|
||||
r2 = await get_memory_reflection(mock_session)
|
||||
|
||||
assert r1 is r2
|
||||
|
||||
async def test_reset_creates_new_instance(
|
||||
self,
|
||||
mock_session: MagicMock,
|
||||
) -> None:
|
||||
"""Should create new instance after reset."""
|
||||
r1 = await get_memory_reflection(mock_session)
|
||||
reset_memory_reflection()
|
||||
r2 = await get_memory_reflection(mock_session)
|
||||
|
||||
assert r1 is not r2
|
||||
559
backend/tests/unit/services/memory/reflection/test_types.py
Normal file
559
backend/tests/unit/services/memory/reflection/test_types.py
Normal file
@@ -0,0 +1,559 @@
|
||||
# tests/unit/services/memory/reflection/test_types.py
|
||||
"""Tests for Memory Reflection types."""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from app.services.memory.reflection.types import (
|
||||
Anomaly,
|
||||
AnomalyType,
|
||||
Factor,
|
||||
FactorType,
|
||||
Insight,
|
||||
InsightType,
|
||||
Pattern,
|
||||
PatternType,
|
||||
ReflectionResult,
|
||||
TimeRange,
|
||||
)
|
||||
|
||||
|
||||
class TestTimeRange:
|
||||
"""Tests for TimeRange."""
|
||||
|
||||
def test_creates_time_range(self) -> None:
|
||||
"""Should create time range with start and end."""
|
||||
start = datetime.now(UTC) - timedelta(days=7)
|
||||
end = datetime.now(UTC)
|
||||
|
||||
tr = TimeRange(start=start, end=end)
|
||||
|
||||
assert tr.start == start
|
||||
assert tr.end == end
|
||||
|
||||
def test_last_hours(self) -> None:
|
||||
"""Should create time range for last N hours."""
|
||||
tr = TimeRange.last_hours(24)
|
||||
|
||||
assert tr.duration_hours >= 23.9
|
||||
assert tr.duration_hours <= 24.1
|
||||
|
||||
def test_last_days(self) -> None:
|
||||
"""Should create time range for last N days."""
|
||||
tr = TimeRange.last_days(7)
|
||||
|
||||
assert tr.duration_days >= 6.9
|
||||
assert tr.duration_days <= 7.1
|
||||
|
||||
def test_duration_hours(self) -> None:
|
||||
"""Should calculate duration in hours."""
|
||||
start = datetime.now(UTC) - timedelta(hours=12)
|
||||
end = datetime.now(UTC)
|
||||
|
||||
tr = TimeRange(start=start, end=end)
|
||||
|
||||
assert 11.9 <= tr.duration_hours <= 12.1
|
||||
|
||||
def test_duration_days(self) -> None:
|
||||
"""Should calculate duration in days."""
|
||||
start = datetime.now(UTC) - timedelta(days=3)
|
||||
end = datetime.now(UTC)
|
||||
|
||||
tr = TimeRange(start=start, end=end)
|
||||
|
||||
assert 2.9 <= tr.duration_days <= 3.1
|
||||
|
||||
|
||||
class TestPattern:
|
||||
"""Tests for Pattern."""
|
||||
|
||||
def test_creates_pattern(self) -> None:
|
||||
"""Should create pattern with all fields."""
|
||||
now = datetime.now(UTC)
|
||||
episode_ids = [uuid4(), uuid4(), uuid4()]
|
||||
|
||||
pattern = Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.RECURRING_SUCCESS,
|
||||
name="Test Pattern",
|
||||
description="A test pattern",
|
||||
confidence=0.85,
|
||||
occurrence_count=10,
|
||||
episode_ids=episode_ids,
|
||||
first_seen=now - timedelta(days=7),
|
||||
last_seen=now,
|
||||
)
|
||||
|
||||
assert pattern.name == "Test Pattern"
|
||||
assert pattern.confidence == 0.85
|
||||
assert len(pattern.episode_ids) == 3
|
||||
|
||||
def test_frequency_calculation(self) -> None:
|
||||
"""Should calculate frequency per day."""
|
||||
now = datetime.now(UTC)
|
||||
|
||||
pattern = Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.RECURRING_SUCCESS,
|
||||
name="Test",
|
||||
description="Test",
|
||||
confidence=0.8,
|
||||
occurrence_count=14,
|
||||
episode_ids=[],
|
||||
first_seen=now - timedelta(days=7),
|
||||
last_seen=now,
|
||||
)
|
||||
|
||||
assert pattern.frequency == 2.0 # 14 occurrences / 7 days
|
||||
|
||||
def test_frequency_minimum_one_day(self) -> None:
|
||||
"""Should use minimum 1 day for frequency calculation."""
|
||||
now = datetime.now(UTC)
|
||||
|
||||
pattern = Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.RECURRING_SUCCESS,
|
||||
name="Test",
|
||||
description="Test",
|
||||
confidence=0.8,
|
||||
occurrence_count=5,
|
||||
episode_ids=[],
|
||||
first_seen=now - timedelta(hours=1), # Less than 1 day
|
||||
last_seen=now,
|
||||
)
|
||||
|
||||
assert pattern.frequency == 5.0 # 5 / 1 day minimum
|
||||
|
||||
def test_to_dict(self) -> None:
|
||||
"""Should convert to dictionary."""
|
||||
pattern = Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.ACTION_SEQUENCE,
|
||||
name="Action Pattern",
|
||||
description="Action sequence",
|
||||
confidence=0.75,
|
||||
occurrence_count=5,
|
||||
episode_ids=[uuid4()],
|
||||
first_seen=datetime.now(UTC) - timedelta(days=1),
|
||||
last_seen=datetime.now(UTC),
|
||||
metadata={"key": "value"},
|
||||
)
|
||||
|
||||
result = pattern.to_dict()
|
||||
|
||||
assert result["name"] == "Action Pattern"
|
||||
assert result["pattern_type"] == "action_sequence"
|
||||
assert result["confidence"] == 0.75
|
||||
assert "frequency" in result
|
||||
assert result["metadata"] == {"key": "value"}
|
||||
|
||||
|
||||
class TestFactor:
|
||||
"""Tests for Factor."""
|
||||
|
||||
def test_creates_factor(self) -> None:
|
||||
"""Should create factor with all fields."""
|
||||
factor = Factor(
|
||||
id=uuid4(),
|
||||
factor_type=FactorType.ACTION,
|
||||
name="Test Factor",
|
||||
description="A test factor",
|
||||
impact_score=0.7,
|
||||
correlation=0.5,
|
||||
sample_size=20,
|
||||
positive_examples=[uuid4()],
|
||||
negative_examples=[uuid4()],
|
||||
)
|
||||
|
||||
assert factor.name == "Test Factor"
|
||||
assert factor.impact_score == 0.7
|
||||
assert factor.correlation == 0.5
|
||||
|
||||
def test_net_impact_calculation(self) -> None:
|
||||
"""Should calculate net impact."""
|
||||
factor = Factor(
|
||||
id=uuid4(),
|
||||
factor_type=FactorType.CONTEXT,
|
||||
name="Test",
|
||||
description="Test",
|
||||
impact_score=0.8,
|
||||
correlation=0.6,
|
||||
sample_size=20,
|
||||
positive_examples=[],
|
||||
negative_examples=[],
|
||||
)
|
||||
|
||||
# net_impact = impact_score * correlation * confidence_weight
|
||||
# confidence_weight = min(1.0, 20/20) = 1.0
|
||||
expected = 0.8 * 0.6 * 1.0
|
||||
assert factor.net_impact == expected
|
||||
|
||||
def test_net_impact_with_small_sample(self) -> None:
|
||||
"""Should weight net impact by sample size."""
|
||||
factor = Factor(
|
||||
id=uuid4(),
|
||||
factor_type=FactorType.CONTEXT,
|
||||
name="Test",
|
||||
description="Test",
|
||||
impact_score=0.8,
|
||||
correlation=0.6,
|
||||
sample_size=10, # Half of 20
|
||||
positive_examples=[],
|
||||
negative_examples=[],
|
||||
)
|
||||
|
||||
# confidence_weight = min(1.0, 10/20) = 0.5
|
||||
expected = 0.8 * 0.6 * 0.5
|
||||
assert factor.net_impact == expected
|
||||
|
||||
def test_to_dict(self) -> None:
|
||||
"""Should convert to dictionary."""
|
||||
factor = Factor(
|
||||
id=uuid4(),
|
||||
factor_type=FactorType.TIMING,
|
||||
name="Timing Factor",
|
||||
description="Time-related",
|
||||
impact_score=0.6,
|
||||
correlation=-0.3,
|
||||
sample_size=15,
|
||||
positive_examples=[],
|
||||
negative_examples=[],
|
||||
metadata={"key": "value"},
|
||||
)
|
||||
|
||||
result = factor.to_dict()
|
||||
|
||||
assert result["name"] == "Timing Factor"
|
||||
assert result["factor_type"] == "timing"
|
||||
assert "net_impact" in result
|
||||
assert result["metadata"] == {"key": "value"}
|
||||
|
||||
|
||||
class TestAnomaly:
|
||||
"""Tests for Anomaly."""
|
||||
|
||||
def test_creates_anomaly(self) -> None:
|
||||
"""Should create anomaly with all fields."""
|
||||
anomaly = Anomaly(
|
||||
id=uuid4(),
|
||||
anomaly_type=AnomalyType.UNUSUAL_DURATION,
|
||||
description="Unusual duration detected",
|
||||
severity=0.75,
|
||||
episode_ids=[uuid4()],
|
||||
detected_at=datetime.now(UTC),
|
||||
baseline_value=10.0,
|
||||
observed_value=30.0,
|
||||
deviation_factor=3.0,
|
||||
)
|
||||
|
||||
assert anomaly.severity == 0.75
|
||||
assert anomaly.baseline_value == 10.0
|
||||
assert anomaly.deviation_factor == 3.0
|
||||
|
||||
def test_is_critical_high_severity(self) -> None:
|
||||
"""Should be critical when severity > 0.8."""
|
||||
anomaly = Anomaly(
|
||||
id=uuid4(),
|
||||
anomaly_type=AnomalyType.UNUSUAL_FAILURE_RATE,
|
||||
description="High failure rate",
|
||||
severity=0.9,
|
||||
episode_ids=[],
|
||||
detected_at=datetime.now(UTC),
|
||||
baseline_value=0.1,
|
||||
observed_value=0.5,
|
||||
deviation_factor=5.0,
|
||||
)
|
||||
|
||||
assert anomaly.is_critical is True
|
||||
|
||||
def test_is_critical_low_severity(self) -> None:
|
||||
"""Should not be critical when severity <= 0.8."""
|
||||
anomaly = Anomaly(
|
||||
id=uuid4(),
|
||||
anomaly_type=AnomalyType.UNUSUAL_DURATION,
|
||||
description="Slightly unusual",
|
||||
severity=0.6,
|
||||
episode_ids=[],
|
||||
detected_at=datetime.now(UTC),
|
||||
baseline_value=10.0,
|
||||
observed_value=20.0,
|
||||
deviation_factor=2.0,
|
||||
)
|
||||
|
||||
assert anomaly.is_critical is False
|
||||
|
||||
def test_to_dict(self) -> None:
|
||||
"""Should convert to dictionary."""
|
||||
anomaly = Anomaly(
|
||||
id=uuid4(),
|
||||
anomaly_type=AnomalyType.UNEXPECTED_OUTCOME,
|
||||
description="Unexpected failure",
|
||||
severity=0.85,
|
||||
episode_ids=[uuid4()],
|
||||
detected_at=datetime.now(UTC),
|
||||
baseline_value=0.9,
|
||||
observed_value=0.0,
|
||||
deviation_factor=0.9,
|
||||
metadata={"task_type": "test"},
|
||||
)
|
||||
|
||||
result = anomaly.to_dict()
|
||||
|
||||
assert result["anomaly_type"] == "unexpected_outcome"
|
||||
assert result["severity"] == 0.85
|
||||
assert result["is_critical"] is True
|
||||
assert result["metadata"] == {"task_type": "test"}
|
||||
|
||||
|
||||
class TestInsight:
|
||||
"""Tests for Insight."""
|
||||
|
||||
def test_creates_insight(self) -> None:
|
||||
"""Should create insight with all fields."""
|
||||
insight = Insight(
|
||||
id=uuid4(),
|
||||
insight_type=InsightType.OPTIMIZATION,
|
||||
title="Performance Opportunity",
|
||||
description="Optimization potential found",
|
||||
priority=0.8,
|
||||
confidence=0.75,
|
||||
source_patterns=[uuid4()],
|
||||
source_factors=[],
|
||||
source_anomalies=[],
|
||||
recommended_actions=["Action 1", "Action 2"],
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
assert insight.title == "Performance Opportunity"
|
||||
assert insight.priority == 0.8
|
||||
assert len(insight.recommended_actions) == 2
|
||||
|
||||
def test_actionable_score(self) -> None:
|
||||
"""Should calculate actionable score."""
|
||||
insight = Insight(
|
||||
id=uuid4(),
|
||||
insight_type=InsightType.RECOMMENDATION,
|
||||
title="Test",
|
||||
description="Test",
|
||||
priority=0.8,
|
||||
confidence=0.9,
|
||||
source_patterns=[],
|
||||
source_factors=[],
|
||||
source_anomalies=[],
|
||||
recommended_actions=["Action 1", "Action 2", "Action 3"],
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
# actionable_score = priority * confidence * action_weight
|
||||
# action_weight = min(1.0, 3/3) = 1.0
|
||||
expected = 0.8 * 0.9 * 1.0
|
||||
assert insight.actionable_score == expected
|
||||
|
||||
def test_actionable_score_few_actions(self) -> None:
|
||||
"""Should weight by action count."""
|
||||
insight = Insight(
|
||||
id=uuid4(),
|
||||
insight_type=InsightType.WARNING,
|
||||
title="Test",
|
||||
description="Test",
|
||||
priority=0.8,
|
||||
confidence=0.9,
|
||||
source_patterns=[],
|
||||
source_factors=[],
|
||||
source_anomalies=[],
|
||||
recommended_actions=["Action 1"], # Only 1 action
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
|
||||
# action_weight = min(1.0, 1/3) = 0.333...
|
||||
expected = 0.8 * 0.9 * (1 / 3)
|
||||
assert abs(insight.actionable_score - expected) < 0.001
|
||||
|
||||
def test_to_dict(self) -> None:
|
||||
"""Should convert to dictionary."""
|
||||
insight = Insight(
|
||||
id=uuid4(),
|
||||
insight_type=InsightType.TREND,
|
||||
title="Trend Analysis",
|
||||
description="Performance trend",
|
||||
priority=0.6,
|
||||
confidence=0.7,
|
||||
source_patterns=[uuid4()],
|
||||
source_factors=[uuid4()],
|
||||
source_anomalies=[],
|
||||
recommended_actions=["Monitor", "Review"],
|
||||
generated_at=datetime.now(UTC),
|
||||
metadata={"health_score": 0.85},
|
||||
)
|
||||
|
||||
result = insight.to_dict()
|
||||
|
||||
assert result["insight_type"] == "trend"
|
||||
assert result["title"] == "Trend Analysis"
|
||||
assert "actionable_score" in result
|
||||
assert result["metadata"] == {"health_score": 0.85}
|
||||
|
||||
|
||||
class TestReflectionResult:
|
||||
"""Tests for ReflectionResult."""
|
||||
|
||||
def test_creates_result(self) -> None:
|
||||
"""Should create reflection result."""
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
result = ReflectionResult(
|
||||
patterns=[],
|
||||
factors=[],
|
||||
anomalies=[],
|
||||
insights=[],
|
||||
time_range=time_range,
|
||||
episodes_analyzed=100,
|
||||
analysis_duration_seconds=2.5,
|
||||
)
|
||||
|
||||
assert result.episodes_analyzed == 100
|
||||
assert result.analysis_duration_seconds == 2.5
|
||||
|
||||
def test_to_dict(self) -> None:
|
||||
"""Should convert to dictionary."""
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
result = ReflectionResult(
|
||||
patterns=[
|
||||
Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.RECURRING_SUCCESS,
|
||||
name="Test",
|
||||
description="Test",
|
||||
confidence=0.8,
|
||||
occurrence_count=5,
|
||||
episode_ids=[],
|
||||
first_seen=datetime.now(UTC),
|
||||
last_seen=datetime.now(UTC),
|
||||
)
|
||||
],
|
||||
factors=[],
|
||||
anomalies=[],
|
||||
insights=[],
|
||||
time_range=time_range,
|
||||
episodes_analyzed=50,
|
||||
analysis_duration_seconds=1.5,
|
||||
)
|
||||
|
||||
data = result.to_dict()
|
||||
|
||||
assert len(data["patterns"]) == 1
|
||||
assert data["episodes_analyzed"] == 50
|
||||
assert "time_range" in data
|
||||
assert "duration_hours" in data["time_range"]
|
||||
|
||||
def test_summary(self) -> None:
|
||||
"""Should generate summary text."""
|
||||
time_range = TimeRange.last_days(7)
|
||||
|
||||
result = ReflectionResult(
|
||||
patterns=[
|
||||
Pattern(
|
||||
id=uuid4(),
|
||||
pattern_type=PatternType.RECURRING_SUCCESS,
|
||||
name="Pattern 1",
|
||||
description="Test",
|
||||
confidence=0.8,
|
||||
occurrence_count=5,
|
||||
episode_ids=[],
|
||||
first_seen=datetime.now(UTC),
|
||||
last_seen=datetime.now(UTC),
|
||||
)
|
||||
],
|
||||
factors=[
|
||||
Factor(
|
||||
id=uuid4(),
|
||||
factor_type=FactorType.ACTION,
|
||||
name="Factor 1",
|
||||
description="Test",
|
||||
impact_score=0.6,
|
||||
correlation=0.4,
|
||||
sample_size=10,
|
||||
positive_examples=[],
|
||||
negative_examples=[],
|
||||
)
|
||||
],
|
||||
anomalies=[],
|
||||
insights=[
|
||||
Insight(
|
||||
id=uuid4(),
|
||||
insight_type=InsightType.OPTIMIZATION,
|
||||
title="Top Insight",
|
||||
description="Test",
|
||||
priority=0.9,
|
||||
confidence=0.8,
|
||||
source_patterns=[],
|
||||
source_factors=[],
|
||||
source_anomalies=[],
|
||||
recommended_actions=["Action"],
|
||||
generated_at=datetime.now(UTC),
|
||||
)
|
||||
],
|
||||
time_range=time_range,
|
||||
episodes_analyzed=100,
|
||||
analysis_duration_seconds=2.0,
|
||||
)
|
||||
|
||||
summary = result.summary
|
||||
|
||||
assert "Reflection Analysis" in summary
|
||||
assert "Episodes analyzed: 100" in summary
|
||||
assert "Patterns detected: 1" in summary
|
||||
assert "Success/failure factors: 1" in summary
|
||||
assert "Insights generated: 1" in summary
|
||||
assert "Top insights:" in summary
|
||||
assert "Top Insight" in summary
|
||||
|
||||
|
||||
class TestPatternType:
|
||||
"""Tests for PatternType enum."""
|
||||
|
||||
def test_all_pattern_types(self) -> None:
|
||||
"""Should have all expected pattern types."""
|
||||
assert PatternType.RECURRING_SUCCESS.value == "recurring_success"
|
||||
assert PatternType.RECURRING_FAILURE.value == "recurring_failure"
|
||||
assert PatternType.ACTION_SEQUENCE.value == "action_sequence"
|
||||
assert PatternType.CONTEXT_CORRELATION.value == "context_correlation"
|
||||
assert PatternType.TEMPORAL.value == "temporal"
|
||||
assert PatternType.EFFICIENCY.value == "efficiency"
|
||||
|
||||
|
||||
class TestFactorType:
|
||||
"""Tests for FactorType enum."""
|
||||
|
||||
def test_all_factor_types(self) -> None:
|
||||
"""Should have all expected factor types."""
|
||||
assert FactorType.ACTION.value == "action"
|
||||
assert FactorType.CONTEXT.value == "context"
|
||||
assert FactorType.TIMING.value == "timing"
|
||||
assert FactorType.RESOURCE.value == "resource"
|
||||
assert FactorType.PRECEDING_STATE.value == "preceding_state"
|
||||
|
||||
|
||||
class TestAnomalyType:
|
||||
"""Tests for AnomalyType enum."""
|
||||
|
||||
def test_all_anomaly_types(self) -> None:
|
||||
"""Should have all expected anomaly types."""
|
||||
assert AnomalyType.UNUSUAL_DURATION.value == "unusual_duration"
|
||||
assert AnomalyType.UNEXPECTED_OUTCOME.value == "unexpected_outcome"
|
||||
assert AnomalyType.UNUSUAL_TOKEN_USAGE.value == "unusual_token_usage"
|
||||
assert AnomalyType.UNUSUAL_FAILURE_RATE.value == "unusual_failure_rate"
|
||||
assert AnomalyType.UNUSUAL_ACTION_PATTERN.value == "unusual_action_pattern"
|
||||
|
||||
|
||||
class TestInsightType:
|
||||
"""Tests for InsightType enum."""
|
||||
|
||||
def test_all_insight_types(self) -> None:
|
||||
"""Should have all expected insight types."""
|
||||
assert InsightType.OPTIMIZATION.value == "optimization"
|
||||
assert InsightType.WARNING.value == "warning"
|
||||
assert InsightType.LEARNING.value == "learning"
|
||||
assert InsightType.RECOMMENDATION.value == "recommendation"
|
||||
assert InsightType.TREND.value == "trend"
|
||||
@@ -2,7 +2,7 @@
|
||||
Tests for Memory System Types.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from uuid import uuid4
|
||||
|
||||
from app.services.memory.types import (
|
||||
@@ -150,7 +150,7 @@ class TestMemoryItem:
|
||||
|
||||
def test_get_age_seconds(self) -> None:
|
||||
"""Test getting item age."""
|
||||
past = datetime.now() - timedelta(seconds=100)
|
||||
past = datetime.now(UTC) - timedelta(seconds=100)
|
||||
item = MemoryItem(
|
||||
id=uuid4(),
|
||||
memory_type=MemoryType.SEMANTIC,
|
||||
@@ -202,7 +202,7 @@ class TestWorkingMemoryItem:
|
||||
scope_id="sess-123",
|
||||
key="my_key",
|
||||
value="value",
|
||||
expires_at=datetime.now() + timedelta(hours=1),
|
||||
expires_at=datetime.now(UTC) + timedelta(hours=1),
|
||||
)
|
||||
|
||||
assert item.is_expired() is False
|
||||
@@ -215,7 +215,7 @@ class TestWorkingMemoryItem:
|
||||
scope_id="sess-123",
|
||||
key="my_key",
|
||||
value="value",
|
||||
expires_at=datetime.now() - timedelta(hours=1),
|
||||
expires_at=datetime.now(UTC) - timedelta(hours=1),
|
||||
)
|
||||
|
||||
assert item.is_expired() is True
|
||||
|
||||
Reference in New Issue
Block a user