style(memory): apply ruff formatting and linting fixes

Auto-fixed linting errors and formatting issues: - Removed unused imports (F401): pytest, Any, AnalysisType, MemoryType, OutcomeType - Removed unused variable (F841): hooks variable in test - Applied consistent formatting across memory service and test files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 14:07:48 +01:00
parent e3fe0439fd
commit cf6291ac8e
17 changed files with 236 additions and 185 deletions
--- a/backend/app/services/context/budget/allocator.py
+++ b/backend/app/services/context/budget/allocator.py
@@ -344,7 +344,12 @@ class BudgetAllocator:
            Rebalanced budget
        """
        if prioritize is None:
-            prioritize = [ContextType.KNOWLEDGE, ContextType.MEMORY, ContextType.TASK, ContextType.SYSTEM]
+            prioritize = [
                ContextType.KNOWLEDGE,
                ContextType.MEMORY,
                ContextType.TASK,
                ContextType.SYSTEM,
            ]
        # Calculate unused tokens per type
        unused: dict[str, int] = {}
--- a/backend/app/services/memory/cache/cache_manager.py
+++ b/backend/app/services/memory/cache/cache_manager.py
@@ -50,7 +50,9 @@ class CacheStats:
            "embedding_cache": self.embedding_cache,
            "retrieval_cache": self.retrieval_cache,
            "overall_hit_rate": self.overall_hit_rate,
-            "last_cleanup": self.last_cleanup.isoformat() if self.last_cleanup else None,
+            "last_cleanup": self.last_cleanup.isoformat()
            if self.last_cleanup
            else None,
            "cleanup_count": self.cleanup_count,
        }
@@ -104,7 +106,8 @@ class CacheManager:
        else:
            self._embedding_cache = create_embedding_cache(
                max_size=self._settings.cache_max_items,
-                default_ttl_seconds=self._settings.cache_ttl_seconds * 12,  # 1hr for embeddings
+                default_ttl_seconds=self._settings.cache_ttl_seconds
                * 12,  # 1hr for embeddings
                redis=redis,
            )
@@ -271,7 +274,9 @@ class CacheManager:
        # Invalidate retrieval cache
        if self._retrieval_cache:
-            uuid_id = UUID(str(memory_id)) if not isinstance(memory_id, UUID) else memory_id
+            uuid_id = (
                UUID(str(memory_id)) if not isinstance(memory_id, UUID) else memory_id
            )
            count += self._retrieval_cache.invalidate_by_memory(uuid_id)
        logger.debug(f"Invalidated {count} cache entries for {memory_type}:{memory_id}")
--- a/backend/app/services/memory/cache/embedding_cache.py
+++ b/backend/app/services/memory/cache/embedding_cache.py
@@ -405,9 +405,7 @@ class EmbeddingCache:
        count = 0
        with self._lock:
-            keys_to_remove = [
+            keys_to_remove = [k for k, v in self._cache.items() if v.model == model]
                k for k, v in self._cache.items() if v.model == model
            ]
            for key in keys_to_remove:
                del self._cache[key]
                count += 1
@@ -454,9 +452,7 @@ class EmbeddingCache:
            Number of entries removed
        """
        with self._lock:
-            keys_to_remove = [
+            keys_to_remove = [k for k, v in self._cache.items() if v.is_expired()]
                k for k, v in self._cache.items() if v.is_expired()
            ]
            for key in keys_to_remove:
                del self._cache[key]
                self._stats.expirations += 1
--- a/backend/app/services/memory/cache/hot_cache.py
+++ b/backend/app/services/memory/cache/hot_cache.py
@@ -384,9 +384,7 @@ class HotMemoryCache[T]:
            Number of entries removed
        """
        with self._lock:
-            keys_to_remove = [
+            keys_to_remove = [k for k, v in self._cache.items() if v.is_expired()]
                k for k, v in self._cache.items() if v.is_expired()
            ]
            for key in keys_to_remove:
                del self._cache[key]
                self._stats.expirations += 1
--- a/backend/app/services/memory/integration/context_source.py
+++ b/backend/app/services/memory/integration/context_source.py
@@ -321,10 +321,7 @@ class MemoryContextSource:
            min_confidence=min_relevance,
        )
-        return [
+        return [MemoryContext.from_semantic_memory(fact, query=query) for fact in facts]
            MemoryContext.from_semantic_memory(fact, query=query)
            for fact in facts
        ]
    async def _fetch_procedural(
        self,
--- a/backend/app/services/memory/integration/lifecycle.py
+++ b/backend/app/services/memory/integration/lifecycle.py
@@ -287,7 +287,9 @@ class AgentLifecycleManager:
            # Get all current state
            all_keys = await working.list_keys()
            # Filter out checkpoint keys
-            state_keys = [k for k in all_keys if not k.startswith(self.CHECKPOINT_PREFIX)]
+            state_keys = [
                k for k in all_keys if not k.startswith(self.CHECKPOINT_PREFIX)
            ]
            state: dict[str, Any] = {}
            for key in state_keys:
@@ -483,7 +485,9 @@ class AgentLifecycleManager:
            # Gather session state for consolidation
            all_keys = await working.list_keys()
-            state_keys = [k for k in all_keys if not k.startswith(self.CHECKPOINT_PREFIX)]
+            state_keys = [
                k for k in all_keys if not k.startswith(self.CHECKPOINT_PREFIX)
            ]
            session_state: dict[str, Any] = {}
            for key in state_keys:
@@ -600,11 +604,13 @@ class AgentLifecycleManager:
                checkpoint_id = key[len(self.CHECKPOINT_PREFIX) :]
                checkpoint = await working.get(key)
                if checkpoint:
-                    checkpoints.append({
+                    checkpoints.append(
                        {
                            "checkpoint_id": checkpoint_id,
                            "timestamp": checkpoint.get("timestamp"),
                            "keys_count": checkpoint.get("keys_count", 0),
-                    })
+                        }
                    )
        # Sort by timestamp (newest first)
        checkpoints.sort(
--- a/backend/app/services/memory/mcp/service.py
+++ b/backend/app/services/memory/mcp/service.py
@@ -414,12 +414,14 @@ class MemoryToolService:
                        if args.query.lower() in key.lower():
                            value = await working.get(key)
                            if value is not None:
-                                results.append({
+                                results.append(
                                    {
                                        "type": "working",
                                        "key": key,
                                        "content": str(value),
                                        "relevance": 1.0,
-                                })
+                                    }
                                )
            elif memory_type == MemoryType.EPISODIC:
                episodic = await self._get_episodic()
@@ -430,14 +432,18 @@ class MemoryToolService:
                    agent_instance_id=context.agent_instance_id,
                )
                for episode in episodes:
-                    results.append({
+                    results.append(
                        {
                            "type": "episodic",
                            "id": str(episode.id),
                            "summary": episode.task_description,
-                        "outcome": episode.outcome.value if episode.outcome else None,
+                            "outcome": episode.outcome.value
                            if episode.outcome
                            else None,
                            "occurred_at": episode.occurred_at.isoformat(),
                            "relevance": episode.importance_score,
-                    })
+                        }
                    )
            elif memory_type == MemoryType.SEMANTIC:
                semantic = await self._get_semantic()
@@ -448,7 +454,8 @@ class MemoryToolService:
                    min_confidence=args.min_relevance,
                )
                for fact in facts:
-                    results.append({
+                    results.append(
                        {
                            "type": "semantic",
                            "id": str(fact.id),
                            "subject": fact.subject,
@@ -456,7 +463,8 @@ class MemoryToolService:
                            "object": fact.object,
                            "confidence": fact.confidence,
                            "relevance": fact.confidence,
-                    })
+                        }
                    )
            elif memory_type == MemoryType.PROCEDURAL:
                procedural = await self._get_procedural()
@@ -467,7 +475,8 @@ class MemoryToolService:
                    limit=args.limit,
                )
                for proc in procedures:
-                    results.append({
+                    results.append(
                        {
                            "type": "procedural",
                            "id": str(proc.id),
                            "name": proc.name,
@@ -475,7 +484,8 @@ class MemoryToolService:
                            "success_rate": proc.success_rate,
                            "steps_count": len(proc.steps) if proc.steps else 0,
                            "relevance": proc.success_rate,
-                    })
+                        }
                    )
        # Sort by relevance and limit
        results.sort(key=lambda x: x.get("relevance", 0), reverse=True)
@@ -601,7 +611,11 @@ class MemoryToolService:
            if ep.task_type:
                task_types[ep.task_type] = task_types.get(ep.task_type, 0) + 1
            if ep.outcome:
-                outcome_val = ep.outcome.value if hasattr(ep.outcome, "value") else str(ep.outcome)
+                outcome_val = (
                    ep.outcome.value
                    if hasattr(ep.outcome, "value")
                    else str(ep.outcome)
                )
                outcomes[outcome_val] = outcomes.get(outcome_val, 0) + 1
        # Sort by frequency
@@ -613,11 +627,13 @@ class MemoryToolService:
        examples = []
        if args.include_examples:
            for ep in episodes[: min(3, args.max_items)]:
-                examples.append({
+                examples.append(
                    {
                        "summary": ep.task_description,
                        "task_type": ep.task_type,
                        "outcome": ep.outcome.value if ep.outcome else None,
-                })
+                    }
                )
        return {
            "analysis_type": "recent_patterns",
@@ -661,11 +677,13 @@ class MemoryToolService:
        examples = []
        if args.include_examples:
            for ep in successful[: min(3, args.max_items)]:
-                examples.append({
+                examples.append(
                    {
                        "summary": ep.task_description,
                        "task_type": ep.task_type,
                        "lessons": ep.lessons_learned,
-                })
+                    }
                )
        return {
            "analysis_type": "success_factors",
@@ -694,9 +712,7 @@ class MemoryToolService:
            failure_by_task[task].append(ep)
        # Most common failure types
-        failure_counts = {
+        failure_counts = {task: len(eps) for task, eps in failure_by_task.items()}
            task: len(eps) for task, eps in failure_by_task.items()
        }
        top_failures = sorted(failure_counts.items(), key=lambda x: x[1], reverse=True)[
            : args.max_items
        ]
@@ -704,12 +720,14 @@ class MemoryToolService:
        examples = []
        if args.include_examples:
            for ep in failed[: min(3, args.max_items)]:
-                examples.append({
+                examples.append(
                    {
                        "summary": ep.task_description,
                        "task_type": ep.task_type,
                        "lessons": ep.lessons_learned,
                        "error": ep.outcome_details,
-                })
+                    }
                )
        return {
            "analysis_type": "failure_patterns",
@@ -794,15 +812,21 @@ class MemoryToolService:
        insights = []
        if top_tasks:
-            insights.append(f"Most common task type: {top_tasks[0][0]} ({top_tasks[0][1]} occurrences)")
+            insights.append(
                f"Most common task type: {top_tasks[0][0]} ({top_tasks[0][1]} occurrences)"
            )
        total = sum(outcome_dist.values())
        if total > 0:
            success_rate = outcome_dist.get("success", 0) / total
            if success_rate > 0.8:
-                insights.append("High success rate observed - current approach is working well")
+                insights.append(
                    "High success rate observed - current approach is working well"
                )
            elif success_rate < 0.5:
-                insights.append("Success rate below 50% - consider reviewing procedures")
+                insights.append(
                    "Success rate below 50% - consider reviewing procedures"
                )
        return insights
@@ -839,9 +863,13 @@ class MemoryToolService:
        if top_failures:
            worst_task, count = top_failures[0]
-            tips.append(f"'{worst_task}' has most failures ({count}) - needs procedure review")
+            tips.append(
                f"'{worst_task}' has most failures ({count}) - needs procedure review"
            )
-        tips.append("Review lessons_learned from past failures before attempting similar tasks")
+        tips.append(
            "Review lessons_learned from past failures before attempting similar tasks"
        )
        return tips
@@ -912,7 +940,11 @@ class MemoryToolService:
            outcomes = {"success": 0, "failure": 0, "partial": 0, "abandoned": 0}
            for ep in recent_episodes:
                if ep.outcome:
-                    key = ep.outcome.value if hasattr(ep.outcome, "value") else str(ep.outcome)
+                    key = (
                        ep.outcome.value
                        if hasattr(ep.outcome, "value")
                        else str(ep.outcome)
                    )
                    if key in outcomes:
                        outcomes[key] += 1
@@ -942,7 +974,8 @@ class MemoryToolService:
        # Filter by minimum success rate if specified
        procedures = [
-            p for p in all_procedures
+            p
            for p in all_procedures
            if args.min_success_rate is None or p.success_rate >= args.min_success_rate
        ][: args.limit]
--- a/backend/app/services/memory/metrics/collector.py
+++ b/backend/app/services/memory/metrics/collector.py
@@ -441,9 +441,7 @@ class MemoryMetrics:
            # Get hits/misses by cache type
            for labels_str, hits in self._counters["memory_cache_hits_total"].items():
-                cache_type = self._parse_labels(labels_str).get(
+                cache_type = self._parse_labels(labels_str).get("cache_type", "unknown")
                    "cache_type", "unknown"
                )
                if cache_type not in stats:
                    stats[cache_type] = {"hits": 0, "misses": 0}
                stats[cache_type]["hits"] = hits
@@ -451,9 +449,7 @@ class MemoryMetrics:
            for labels_str, misses in self._counters[
                "memory_cache_misses_total"
            ].items():
-                cache_type = self._parse_labels(labels_str).get(
+                cache_type = self._parse_labels(labels_str).get("cache_type", "unknown")
                    "cache_type", "unknown"
                )
                if cache_type not in stats:
                    stats[cache_type] = {"hits": 0, "misses": 0}
                stats[cache_type]["misses"] = misses
--- a/backend/app/services/memory/reflection/service.py
+++ b/backend/app/services/memory/reflection/service.py
@@ -149,8 +149,7 @@ class MemoryReflection:
        # Filter to time range
        episodes = [
-            e for e in episodes
+            e for e in episodes if time_range.start <= e.occurred_at <= time_range.end
            if time_range.start <= e.occurred_at <= time_range.end
        ]
        if not episodes:
@@ -313,7 +312,9 @@ class MemoryReflection:
                        f"Task type '{task_type}': {success_rate:.0%} success rate, "
                        f"avg {avg_duration:.1f}s duration, {avg_tokens:.0f} tokens"
                    ),
-                    confidence=min(1.0, stats["total"] / 10),  # Higher sample = higher confidence
+                    confidence=min(
                        1.0, stats["total"] / 10
                    ),  # Higher sample = higher confidence
                    occurrence_count=stats["total"],
                    episode_ids=[e.id for e in stats["episodes"]],
                    first_seen=min(e.occurred_at for e in stats["episodes"]),
@@ -397,7 +398,9 @@ class MemoryReflection:
        failed = [e for e in episodes if e.outcome == Outcome.FAILURE]
        if len(successful) >= 3 and len(failed) >= 3:
-            avg_success_duration = statistics.mean(e.duration_seconds for e in successful)
+            avg_success_duration = statistics.mean(
                e.duration_seconds for e in successful
            )
            avg_failure_duration = statistics.mean(e.duration_seconds for e in failed)
            if avg_failure_duration > avg_success_duration * 1.5:
@@ -427,9 +430,15 @@ class MemoryReflection:
        # Analyze token efficiency
        if len(successful) >= 3:
            avg_tokens = statistics.mean(e.tokens_used for e in successful)
-            std_tokens = statistics.stdev(e.tokens_used for e in successful) if len(successful) > 1 else 0
+            std_tokens = (
                statistics.stdev(e.tokens_used for e in successful)
                if len(successful) > 1
                else 0
            )
-            efficient = [e for e in successful if e.tokens_used < avg_tokens - std_tokens]
+            efficient = [
                e for e in successful if e.tokens_used < avg_tokens - std_tokens
            ]
            if len(efficient) >= self._config.min_pattern_occurrences:
                patterns.append(
                    Pattern(
@@ -508,8 +517,7 @@ class MemoryReflection:
        # Filter to time range
        episodes = [
-            e for e in episodes
+            e for e in episodes if time_range.start <= e.occurred_at <= time_range.end
            if time_range.start <= e.occurred_at <= time_range.end
        ]
        if len(episodes) < self._config.min_sample_size_for_factor:
@@ -652,9 +660,7 @@ class MemoryReflection:
            avg_success_duration = statistics.mean(
                e.duration_seconds for e in successful
            )
-            avg_failure_duration = statistics.mean(
+            avg_failure_duration = statistics.mean(e.duration_seconds for e in failed)
                e.duration_seconds for e in failed
            )
            if avg_success_duration > 0:
                duration_ratio = avg_failure_duration / avg_success_duration
@@ -837,7 +843,9 @@ class MemoryReflection:
        baseline_durations = [e.duration_seconds for e in baseline]
        baseline_mean = statistics.mean(baseline_durations)
-        baseline_std = statistics.stdev(baseline_durations) if len(baseline_durations) > 1 else 0
+        baseline_std = (
            statistics.stdev(baseline_durations) if len(baseline_durations) > 1 else 0
        )
        if baseline_std == 0:
            return anomalies
@@ -997,7 +1005,10 @@ class MemoryReflection:
        ) / len(recent)
        # Detect significant failure rate increase
-        if recent_failure_rate > baseline_failure_rate * 1.5 and recent_failure_rate > 0.3:
+        if (
            recent_failure_rate > baseline_failure_rate * 1.5
            and recent_failure_rate > 0.3
        ):
            rate_increase = recent_failure_rate / max(baseline_failure_rate, 0.01)
            anomalies.append(
@@ -1074,14 +1085,11 @@ class MemoryReflection:
        insights.extend(self._insights_from_anomalies(anomalies))
        # Generate cross-cutting insights
-        insights.extend(
+        insights.extend(self._generate_cross_insights(patterns, factors, anomalies))
            self._generate_cross_insights(patterns, factors, anomalies)
        )
        # Filter by confidence and sort by priority
        insights = [
-            i for i in insights
+            i for i in insights if i.confidence >= self._config.min_insight_confidence
            if i.confidence >= self._config.min_insight_confidence
        ]
        insights.sort(key=lambda i: -i.priority)
@@ -1182,9 +1190,7 @@ class MemoryReflection:
                    source_patterns=[],
                    source_factors=[f.id for f in top_positive],
                    source_anomalies=[],
-                    recommended_actions=[
+                    recommended_actions=[f"Reinforce: {f.name}" for f in top_positive],
                        f"Reinforce: {f.name}" for f in top_positive
                    ],
                    generated_at=_utcnow(),
                    metadata={
                        "factors": [f.to_dict() for f in top_positive],
@@ -1200,17 +1206,16 @@ class MemoryReflection:
                    insight_type=InsightType.WARNING,
                    title="Factors correlating with failure",
                    description=(
-                        "Risky factors: "
+                        "Risky factors: " + ", ".join(f.name for f in top_negative)
                        + ", ".join(f.name for f in top_negative)
                    ),
                    priority=0.75,
-                    confidence=statistics.mean(abs(f.correlation) for f in top_negative),
+                    confidence=statistics.mean(
                        abs(f.correlation) for f in top_negative
                    ),
                    source_patterns=[],
                    source_factors=[f.id for f in top_negative],
                    source_anomalies=[],
-                    recommended_actions=[
+                    recommended_actions=[f"Mitigate: {f.name}" for f in top_negative],
                        f"Mitigate: {f.name}" for f in top_negative
                    ],
                    generated_at=_utcnow(),
                    metadata={
                        "factors": [f.to_dict() for f in top_negative],
@@ -1254,8 +1259,7 @@ class MemoryReflection:
            )
        failure_rate_anomalies = [
-            a for a in anomalies
+            a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_FAILURE_RATE
            if a.anomaly_type == AnomalyType.UNUSUAL_FAILURE_RATE
        ]
        if failure_rate_anomalies:
            for anomaly in failure_rate_anomalies:
@@ -1295,7 +1299,13 @@ class MemoryReflection:
        total_items = len(patterns) + len(factors) + len(anomalies)
        if total_items > 0:
            warning_count = (
-                len([p for p in patterns if p.pattern_type == PatternType.RECURRING_FAILURE])
+                len(
                    [
                        p
                        for p in patterns
                        if p.pattern_type == PatternType.RECURRING_FAILURE
                    ]
                )
                + len([a for a in anomalies if a.is_critical])
                + len([f for f in factors if f.correlation < -0.3])
            )
@@ -1312,13 +1322,19 @@ class MemoryReflection:
                        f"Found {warning_count} warning indicators."
                    ),
                    priority=0.6,
-                    confidence=min(1.0, total_items / 20),  # Higher sample = higher confidence
+                    confidence=min(
                        1.0, total_items / 20
                    ),  # Higher sample = higher confidence
                    source_patterns=[p.id for p in patterns[:5]],
                    source_factors=[f.id for f in factors[:5]],
                    source_anomalies=[a.id for a in anomalies[:5]],
                    recommended_actions=(
-                        ["Continue current practices"] if health_score > 0.7
+                        ["Continue current practices"]
-                        else ["Review warnings and address issues", "Focus on improvement areas"]
+                        if health_score > 0.7
                        else [
                            "Review warnings and address issues",
                            "Focus on improvement areas",
                        ]
                    ),
                    generated_at=_utcnow(),
                    metadata={
@@ -1374,8 +1390,7 @@ class MemoryReflection:
            agent_instance_id=agent_instance_id,
        )
        episodes_in_range = [
-            e for e in episodes
+            e for e in episodes if time_range.start <= e.occurred_at <= time_range.end
            if time_range.start <= e.occurred_at <= time_range.end
        ]
        # Run all analyses
--- a/backend/app/services/memory/reflection/types.py
+++ b/backend/app/services/memory/reflection/types.py
@@ -70,8 +70,7 @@ class TimeRange:
        """Create time range for last N hours."""
        end = _utcnow()
        start = datetime(
-            end.year, end.month, end.day, end.hour, end.minute, end.second,
+            end.year, end.month, end.day, end.hour, end.minute, end.second, tzinfo=UTC
            tzinfo=UTC
        ) - __import__("datetime").timedelta(hours=hours)
        return cls(start=start, end=end)
--- a/backend/tests/unit/services/context/types/test_memory.py
+++ b/backend/tests/unit/services/context/types/test_memory.py
@@ -5,8 +5,6 @@ from datetime import UTC, datetime
 from unittest.mock import MagicMock
 from uuid import uuid4
 import pytest
 from app.services.context.types import ContextType
 from app.services.context.types.memory import MemoryContext, MemorySubtype
--- a/backend/tests/unit/services/memory/integration/test_context_source.py
+++ b/backend/tests/unit/services/memory/integration/test_context_source.py
@@ -133,9 +133,7 @@ class TestMemoryContextSource:
        )
        assert result.by_type["working"] == 2
-        assert all(
+        assert all(c.memory_subtype == MemorySubtype.WORKING for c in result.contexts)
            c.memory_subtype == MemorySubtype.WORKING for c in result.contexts
        )
    @patch("app.services.memory.integration.context_source.EpisodicMemory")
    async def test_fetch_episodic_memory(
@@ -252,11 +250,10 @@ class TestMemoryContextSource:
        context_source: MemoryContextSource,
    ) -> None:
        """Results should be sorted by relevance score."""
-        with patch.object(
+        with (
-            context_source, "_fetch_episodic"
+            patch.object(context_source, "_fetch_episodic") as mock_ep,
-        ) as mock_ep, patch.object(
+            patch.object(context_source, "_fetch_semantic") as mock_sem,
-            context_source, "_fetch_semantic"
+        ):
        ) as mock_sem:
            # Create contexts with different relevance scores
            from app.services.context.types.memory import MemoryContext
--- a/backend/tests/unit/services/memory/integration/test_lifecycle.py
+++ b/backend/tests/unit/services/memory/integration/test_lifecycle.py
@@ -105,6 +105,7 @@ class TestLifecycleHooks:
    def test_register_spawn_hook(self, lifecycle_hooks: LifecycleHooks) -> None:
        """Should register spawn hook."""
        async def my_hook(event: LifecycleEvent) -> None:
            pass
@@ -115,7 +116,7 @@ class TestLifecycleHooks:
    def test_register_all_hooks(self, lifecycle_hooks: LifecycleHooks) -> None:
        """Should register hooks for all event types."""
-        hooks = [
+        [
            lifecycle_hooks.on_spawn(AsyncMock()),
            lifecycle_hooks.on_pause(AsyncMock()),
            lifecycle_hooks.on_resume(AsyncMock()),
--- a/backend/tests/unit/services/memory/mcp/test_service.py
+++ b/backend/tests/unit/services/memory/mcp/test_service.py
@@ -2,7 +2,6 @@
 """Tests for MemoryToolService."""
 from datetime import UTC, datetime
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 from uuid import UUID, uuid4
@@ -14,11 +13,6 @@ from app.services.memory.mcp.service import (
    ToolResult,
    get_memory_tool_service,
 )
 from app.services.memory.mcp.tools import (
    AnalysisType,
    MemoryType,
    OutcomeType,
 )
 from app.services.memory.types import Outcome
 pytestmark = pytest.mark.asyncio(loop_scope="function")
@@ -192,7 +186,9 @@ class TestMemoryToolService:
        context: ToolContext,
    ) -> None:
        """Remember should store in episodic memory."""
-        with patch("app.services.memory.mcp.service.EpisodicMemory") as mock_episodic_cls:
+        with patch(
            "app.services.memory.mcp.service.EpisodicMemory"
        ) as mock_episodic_cls:
            # Setup mock
            mock_episode = MagicMock()
            mock_episode.id = uuid4()
@@ -260,7 +256,9 @@ class TestMemoryToolService:
        context: ToolContext,
    ) -> None:
        """Remember should store facts in semantic memory."""
-        with patch("app.services.memory.mcp.service.SemanticMemory") as mock_semantic_cls:
+        with patch(
            "app.services.memory.mcp.service.SemanticMemory"
        ) as mock_semantic_cls:
            mock_fact = MagicMock()
            mock_fact.id = uuid4()
@@ -311,7 +309,9 @@ class TestMemoryToolService:
        context: ToolContext,
    ) -> None:
        """Remember should store procedures in procedural memory."""
-        with patch("app.services.memory.mcp.service.ProceduralMemory") as mock_procedural_cls:
+        with patch(
            "app.services.memory.mcp.service.ProceduralMemory"
        ) as mock_procedural_cls:
            mock_procedure = MagicMock()
            mock_procedure.id = uuid4()
@@ -530,15 +530,21 @@ class TestMemoryToolService:
        mock_working_cls.for_session = AsyncMock(return_value=mock_working)
        mock_episodic = AsyncMock()
-        mock_episodic.get_recent = AsyncMock(return_value=[MagicMock() for _ in range(10)])
+        mock_episodic.get_recent = AsyncMock(
            return_value=[MagicMock() for _ in range(10)]
        )
        mock_episodic_cls.create = AsyncMock(return_value=mock_episodic)
        mock_semantic = AsyncMock()
-        mock_semantic.search_facts = AsyncMock(return_value=[MagicMock() for _ in range(5)])
+        mock_semantic.search_facts = AsyncMock(
            return_value=[MagicMock() for _ in range(5)]
        )
        mock_semantic_cls.create = AsyncMock(return_value=mock_semantic)
        mock_procedural = AsyncMock()
-        mock_procedural.find_matching = AsyncMock(return_value=[MagicMock() for _ in range(3)])
+        mock_procedural.find_matching = AsyncMock(
            return_value=[MagicMock() for _ in range(3)]
        )
        mock_procedural_cls.create = AsyncMock(return_value=mock_procedural)
        result = await service.execute_tool(
@@ -603,8 +609,12 @@ class TestMemoryToolService:
    ) -> None:
        """Record outcome should store outcome and update procedure."""
        with (
-            patch("app.services.memory.mcp.service.EpisodicMemory") as mock_episodic_cls,
+            patch(
-            patch("app.services.memory.mcp.service.ProceduralMemory") as mock_procedural_cls,
+                "app.services.memory.mcp.service.EpisodicMemory"
            ) as mock_episodic_cls,
            patch(
                "app.services.memory.mcp.service.ProceduralMemory"
            ) as mock_procedural_cls,
        ):
            mock_episode = MagicMock()
            mock_episode.id = uuid4()
--- a/backend/tests/unit/services/memory/mcp/test_tools.py
+++ b/backend/tests/unit/services/memory/mcp/test_tools.py
@@ -358,10 +358,12 @@ class TestMemoryToolDefinition:
        )
        # Valid args
-        validated = tool.validate_args({
+        validated = tool.validate_args(
            {
                "memory_type": "working",
                "content": "Test content",
-        })
+            }
        )
        assert isinstance(validated, RememberArgs)
        # Invalid args
@@ -417,4 +419,6 @@ class TestToolDefinitions:
        """All tool schemas should have properties defined."""
        for name, tool in MEMORY_TOOL_DEFINITIONS.items():
            schema = tool.to_mcp_format()
-            assert "properties" in schema["inputSchema"], f"Tool {name} missing properties"
+            assert "properties" in schema["inputSchema"], (
                f"Tool {name} missing properties"
            )
--- a/backend/tests/unit/services/memory/metrics/test_collector.py
+++ b/backend/tests/unit/services/memory/metrics/test_collector.py
@@ -134,9 +134,7 @@ class TestMemoryMetrics:
        await metrics.set_memory_items_count("semantic", "project", 50)
        all_metrics = await metrics.get_all_metrics()
-        gauge_metrics = [
+        gauge_metrics = [m for m in all_metrics if m.name == "memory_items_count"]
            m for m in all_metrics if m.name == "memory_items_count"
        ]
        assert len(gauge_metrics) == 2
@@ -181,7 +179,11 @@ class TestMemoryMetrics:
        all_metrics = await metrics.get_all_metrics()
        count_metric = next(
-            (m for m in all_metrics if m.name == "memory_working_latency_seconds_count"),
+            (
                m
                for m in all_metrics
                if m.name == "memory_working_latency_seconds_count"
            ),
            None,
        )
        sum_metric = next(
@@ -204,9 +206,7 @@ class TestMemoryMetrics:
        assert summary["avg_retrieval_latency_ms"] == pytest.approx(62.5, rel=0.01)
    @pytest.mark.asyncio
-    async def test_observe_consolidation_duration(
+    async def test_observe_consolidation_duration(self, metrics: MemoryMetrics) -> None:
        self, metrics: MemoryMetrics
    ) -> None:
        """Should record consolidation duration histogram."""
        await metrics.observe_consolidation_duration(5.0)
        await metrics.observe_consolidation_duration(10.0)
@@ -236,7 +236,9 @@ class TestMemoryMetrics:
        assert len(all_metrics) >= 3
        # Check we have different metric types
-        counter_metrics = [m for m in all_metrics if m.metric_type == MetricType.COUNTER]
+        counter_metrics = [
            m for m in all_metrics if m.metric_type == MetricType.COUNTER
        ]
        gauge_metrics = [m for m in all_metrics if m.metric_type == MetricType.GAUGE]
        assert len(counter_metrics) >= 1
--- a/backend/tests/unit/services/memory/reflection/test_service.py
+++ b/backend/tests/unit/services/memory/reflection/test_service.py
@@ -153,8 +153,7 @@ class TestPatternDetection:
        # Should find recurring success pattern for 'build' task
        success_patterns = [
-            p for p in patterns
+            p for p in patterns if p.pattern_type == PatternType.RECURRING_SUCCESS
            if p.pattern_type == PatternType.RECURRING_SUCCESS
        ]
        assert len(success_patterns) >= 1
        assert any(p.name.find("build") >= 0 for p in success_patterns)
@@ -193,8 +192,7 @@ class TestPatternDetection:
        patterns = await reflection.analyze_patterns(project_id, time_range)
        failure_patterns = [
-            p for p in patterns
+            p for p in patterns if p.pattern_type == PatternType.RECURRING_FAILURE
            if p.pattern_type == PatternType.RECURRING_FAILURE
        ]
        assert len(failure_patterns) >= 1
@@ -229,8 +227,7 @@ class TestPatternDetection:
        patterns = await reflection.analyze_patterns(project_id, time_range)
        action_patterns = [
-            p for p in patterns
+            p for p in patterns if p.pattern_type == PatternType.ACTION_SEQUENCE
            if p.pattern_type == PatternType.ACTION_SEQUENCE
        ]
        assert len(action_patterns) >= 1
@@ -438,8 +435,7 @@ class TestAnomalyDetection:
        anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
        duration_anomalies = [
-            a for a in anomalies
+            a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_DURATION
            if a.anomaly_type == AnomalyType.UNUSUAL_DURATION
        ]
        assert len(duration_anomalies) >= 1
@@ -475,8 +471,7 @@ class TestAnomalyDetection:
        anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
        outcome_anomalies = [
-            a for a in anomalies
+            a for a in anomalies if a.anomaly_type == AnomalyType.UNEXPECTED_OUTCOME
            if a.anomaly_type == AnomalyType.UNEXPECTED_OUTCOME
        ]
        assert len(outcome_anomalies) >= 1
@@ -510,8 +505,7 @@ class TestAnomalyDetection:
        anomalies = await reflection.detect_anomalies(project_id, baseline_days=30)
        token_anomalies = [
-            a for a in anomalies
+            a for a in anomalies if a.anomaly_type == AnomalyType.UNUSUAL_TOKEN_USAGE
            if a.anomaly_type == AnomalyType.UNUSUAL_TOKEN_USAGE
        ]
        assert len(token_anomalies) >= 1
@@ -650,9 +644,7 @@ class TestInsightGeneration:
        insights = await reflection.generate_insights(project_id)
-        trend_insights = [
+        trend_insights = [i for i in insights if i.insight_type == InsightType.TREND]
            i for i in insights if i.insight_type == InsightType.TREND
        ]
        assert len(trend_insights) >= 1
    async def test_insights_sorted_by_priority(
@@ -662,10 +654,7 @@ class TestInsightGeneration:
        """Should sort insights by priority."""
        project_id = uuid4()
-        episodes = [
+        episodes = [create_mock_episode(outcome=Outcome.SUCCESS) for _ in range(10)]
            create_mock_episode(outcome=Outcome.SUCCESS)
            for _ in range(10)
        ]
        mock_episodic = MagicMock()
        mock_episodic.get_recent = AsyncMock(return_value=episodes)