feat(memory): implement metrics and observability (#100)

Add comprehensive metrics collector for memory system with: - Counter metrics: operations, retrievals, cache hits/misses, consolidations, episodes recorded, patterns/anomalies/insights detected - Gauge metrics: item counts, memory size, cache size, procedure success rates, active sessions, pending consolidations - Histogram metrics: working memory latency, retrieval latency, consolidation duration, embedding latency - Prometheus format export - Summary and cache stats helpers 31 tests covering all metric types, singleton pattern, and edge cases. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 11:00:53 +01:00
parent 997cfaa03a
commit 57680c3772
4 changed files with 1029 additions and 0 deletions
--- a/backend/app/services/memory/metrics/init.py
+++ b/backend/app/services/memory/metrics/init.py
@@ -0,0 +1,18 @@
 # app/services/memory/metrics/__init__.py
 """Memory Metrics module."""
 from .collector import (
    MemoryMetrics,
    get_memory_metrics,
    record_memory_operation,
    record_retrieval,
    reset_memory_metrics,
 )
 __all__ = [
    "MemoryMetrics",
    "get_memory_metrics",
    "record_memory_operation",
    "record_retrieval",
    "reset_memory_metrics",
 ]
--- a/backend/app/services/memory/metrics/collector.py
+++ b/backend/app/services/memory/metrics/collector.py
@@ -0,0 +1,539 @@
 # app/services/memory/metrics/collector.py
 """
 Memory Metrics Collector
 Collects and exposes metrics for the memory system.
 """
 import asyncio
 import logging
 from collections import Counter, defaultdict
 from dataclasses import dataclass, field
 from datetime import UTC, datetime
 from enum import Enum
 from typing import Any
 logger = logging.getLogger(__name__)
 class MetricType(str, Enum):
    """Types of metrics."""
    COUNTER = "counter"
    GAUGE = "gauge"
    HISTOGRAM = "histogram"
@dataclass
 class MetricValue:
    """A single metric value."""
    name: str
    metric_type: MetricType
    value: float
    labels: dict[str, str] = field(default_factory=dict)
    timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
@dataclass
 class HistogramBucket:
    """Histogram bucket for distribution metrics."""
    le: float  # Less than or equal
    count: int = 0
 class MemoryMetrics:
    """
    Collects memory system metrics.
    Metrics tracked:
    - Memory operations (get/set/delete by type and scope)
    - Retrieval operations and latencies
    - Memory item counts by type
    - Consolidation operations and durations
    - Cache hit/miss rates
    - Procedure success rates
    - Embedding operations
    """
    def __init__(self) -> None:
        """Initialize MemoryMetrics."""
        self._counters: dict[str, Counter[str]] = defaultdict(Counter)
        self._gauges: dict[str, dict[str, float]] = defaultdict(dict)
        self._histograms: dict[str, list[float]] = defaultdict(list)
        self._histogram_buckets: dict[str, list[HistogramBucket]] = {}
        self._lock = asyncio.Lock()
        # Initialize histogram buckets
        self._init_histogram_buckets()
    def _init_histogram_buckets(self) -> None:
        """Initialize histogram buckets for latency metrics."""
        # Fast operations (working memory)
        fast_buckets = [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, float("inf")]
        # Normal operations (retrieval)
        normal_buckets = [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, float("inf")]
        # Slow operations (consolidation)
        slow_buckets = [0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, float("inf")]
        self._histogram_buckets["memory_working_latency_seconds"] = [
            HistogramBucket(le=b) for b in fast_buckets
        ]
        self._histogram_buckets["memory_retrieval_latency_seconds"] = [
            HistogramBucket(le=b) for b in normal_buckets
        ]
        self._histogram_buckets["memory_consolidation_duration_seconds"] = [
            HistogramBucket(le=b) for b in slow_buckets
        ]
        self._histogram_buckets["memory_embedding_latency_seconds"] = [
            HistogramBucket(le=b) for b in normal_buckets
        ]
    # Counter methods - Operations
    async def inc_operations(
        self,
        operation: str,
        memory_type: str,
        scope: str | None = None,
        success: bool = True,
    ) -> None:
        """Increment memory operation counter."""
        async with self._lock:
            labels = f"operation={operation},memory_type={memory_type}"
            if scope:
                labels += f",scope={scope}"
            labels += f",success={str(success).lower()}"
            self._counters["memory_operations_total"][labels] += 1
    async def inc_retrieval(
        self,
        memory_type: str,
        strategy: str,
        results_count: int,
    ) -> None:
        """Increment retrieval counter."""
        async with self._lock:
            labels = f"memory_type={memory_type},strategy={strategy}"
            self._counters["memory_retrievals_total"][labels] += 1
            # Track result counts as a separate metric
            self._counters["memory_retrieval_results_total"][labels] += results_count
    async def inc_cache_hit(self, cache_type: str) -> None:
        """Increment cache hit counter."""
        async with self._lock:
            labels = f"cache_type={cache_type}"
            self._counters["memory_cache_hits_total"][labels] += 1
    async def inc_cache_miss(self, cache_type: str) -> None:
        """Increment cache miss counter."""
        async with self._lock:
            labels = f"cache_type={cache_type}"
            self._counters["memory_cache_misses_total"][labels] += 1
    async def inc_consolidation(
        self,
        consolidation_type: str,
        success: bool = True,
    ) -> None:
        """Increment consolidation counter."""
        async with self._lock:
            labels = f"type={consolidation_type},success={str(success).lower()}"
            self._counters["memory_consolidations_total"][labels] += 1
    async def inc_procedure_execution(
        self,
        procedure_id: str | None = None,
        success: bool = True,
    ) -> None:
        """Increment procedure execution counter."""
        async with self._lock:
            labels = f"success={str(success).lower()}"
            self._counters["memory_procedure_executions_total"][labels] += 1
    async def inc_embeddings_generated(self, memory_type: str) -> None:
        """Increment embeddings generated counter."""
        async with self._lock:
            labels = f"memory_type={memory_type}"
            self._counters["memory_embeddings_generated_total"][labels] += 1
    async def inc_fact_reinforcements(self) -> None:
        """Increment fact reinforcement counter."""
        async with self._lock:
            self._counters["memory_fact_reinforcements_total"][""] += 1
    async def inc_episodes_recorded(self, outcome: str) -> None:
        """Increment episodes recorded counter."""
        async with self._lock:
            labels = f"outcome={outcome}"
            self._counters["memory_episodes_recorded_total"][labels] += 1
    async def inc_anomalies_detected(self, anomaly_type: str) -> None:
        """Increment anomaly detection counter."""
        async with self._lock:
            labels = f"anomaly_type={anomaly_type}"
            self._counters["memory_anomalies_detected_total"][labels] += 1
    async def inc_patterns_detected(self, pattern_type: str) -> None:
        """Increment pattern detection counter."""
        async with self._lock:
            labels = f"pattern_type={pattern_type}"
            self._counters["memory_patterns_detected_total"][labels] += 1
    async def inc_insights_generated(self, insight_type: str) -> None:
        """Increment insight generation counter."""
        async with self._lock:
            labels = f"insight_type={insight_type}"
            self._counters["memory_insights_generated_total"][labels] += 1
    # Gauge methods
    async def set_memory_items_count(
        self,
        memory_type: str,
        scope: str,
        count: int,
    ) -> None:
        """Set memory item count gauge."""
        async with self._lock:
            labels = f"memory_type={memory_type},scope={scope}"
            self._gauges["memory_items_count"][labels] = float(count)
    async def set_memory_size_bytes(
        self,
        memory_type: str,
        scope: str,
        size_bytes: int,
    ) -> None:
        """Set memory size gauge in bytes."""
        async with self._lock:
            labels = f"memory_type={memory_type},scope={scope}"
            self._gauges["memory_size_bytes"][labels] = float(size_bytes)
    async def set_cache_size(self, cache_type: str, size: int) -> None:
        """Set cache size gauge."""
        async with self._lock:
            labels = f"cache_type={cache_type}"
            self._gauges["memory_cache_size"][labels] = float(size)
    async def set_procedure_success_rate(
        self,
        procedure_name: str,
        rate: float,
    ) -> None:
        """Set procedure success rate gauge (0-1)."""
        async with self._lock:
            labels = f"procedure_name={procedure_name}"
            self._gauges["memory_procedure_success_rate"][labels] = rate
    async def set_active_sessions(self, count: int) -> None:
        """Set active working memory sessions gauge."""
        async with self._lock:
            self._gauges["memory_active_sessions"][""] = float(count)
    async def set_pending_consolidations(self, count: int) -> None:
        """Set pending consolidations gauge."""
        async with self._lock:
            self._gauges["memory_pending_consolidations"][""] = float(count)
    # Histogram methods
    async def observe_working_latency(self, latency_seconds: float) -> None:
        """Observe working memory operation latency."""
        async with self._lock:
            self._observe_histogram("memory_working_latency_seconds", latency_seconds)
    async def observe_retrieval_latency(self, latency_seconds: float) -> None:
        """Observe retrieval latency."""
        async with self._lock:
            self._observe_histogram("memory_retrieval_latency_seconds", latency_seconds)
    async def observe_consolidation_duration(self, duration_seconds: float) -> None:
        """Observe consolidation duration."""
        async with self._lock:
            self._observe_histogram(
                "memory_consolidation_duration_seconds", duration_seconds
            )
    async def observe_embedding_latency(self, latency_seconds: float) -> None:
        """Observe embedding generation latency."""
        async with self._lock:
            self._observe_histogram("memory_embedding_latency_seconds", latency_seconds)
    def _observe_histogram(self, name: str, value: float) -> None:
        """Record a value in a histogram."""
        self._histograms[name].append(value)
        # Update buckets
        if name in self._histogram_buckets:
            for bucket in self._histogram_buckets[name]:
                if value <= bucket.le:
                    bucket.count += 1
    # Export methods
    async def get_all_metrics(self) -> list[MetricValue]:
        """Get all metrics as MetricValue objects."""
        metrics: list[MetricValue] = []
        async with self._lock:
            # Export counters
            for name, counter in self._counters.items():
                for labels_str, value in counter.items():
                    labels = self._parse_labels(labels_str)
                    metrics.append(
                        MetricValue(
                            name=name,
                            metric_type=MetricType.COUNTER,
                            value=float(value),
                            labels=labels,
                        )
                    )
            # Export gauges
            for name, gauge_dict in self._gauges.items():
                for labels_str, gauge_value in gauge_dict.items():
                    gauge_labels = self._parse_labels(labels_str)
                    metrics.append(
                        MetricValue(
                            name=name,
                            metric_type=MetricType.GAUGE,
                            value=gauge_value,
                            labels=gauge_labels,
                        )
                    )
            # Export histogram summaries
            for name, values in self._histograms.items():
                if values:
                    metrics.append(
                        MetricValue(
                            name=f"{name}_count",
                            metric_type=MetricType.COUNTER,
                            value=float(len(values)),
                        )
                    )
                    metrics.append(
                        MetricValue(
                            name=f"{name}_sum",
                            metric_type=MetricType.COUNTER,
                            value=sum(values),
                        )
                    )
        return metrics
    async def get_prometheus_format(self) -> str:
        """Export metrics in Prometheus text format."""
        lines: list[str] = []
        async with self._lock:
            # Export counters
            for name, counter in self._counters.items():
                lines.append(f"# TYPE {name} counter")
                for labels_str, value in counter.items():
                    if labels_str:
                        lines.append(f"{name}{{{labels_str}}} {value}")
                    else:
                        lines.append(f"{name} {value}")
            # Export gauges
            for name, gauge_dict in self._gauges.items():
                lines.append(f"# TYPE {name} gauge")
                for labels_str, gauge_value in gauge_dict.items():
                    if labels_str:
                        lines.append(f"{name}{{{labels_str}}} {gauge_value}")
                    else:
                        lines.append(f"{name} {gauge_value}")
            # Export histograms
            for name, buckets in self._histogram_buckets.items():
                lines.append(f"# TYPE {name} histogram")
                for bucket in buckets:
                    le_str = "+Inf" if bucket.le == float("inf") else str(bucket.le)
                    lines.append(f'{name}_bucket{{le="{le_str}"}} {bucket.count}')
                if name in self._histograms:
                    values = self._histograms[name]
                    lines.append(f"{name}_count {len(values)}")
                    lines.append(f"{name}_sum {sum(values)}")
        return "\n".join(lines)
    async def get_summary(self) -> dict[str, Any]:
        """Get a summary of key metrics."""
        async with self._lock:
            total_operations = sum(self._counters["memory_operations_total"].values())
            successful_operations = sum(
                v
                for k, v in self._counters["memory_operations_total"].items()
                if "success=true" in k
            )
            total_retrievals = sum(self._counters["memory_retrievals_total"].values())
            total_cache_hits = sum(self._counters["memory_cache_hits_total"].values())
            total_cache_misses = sum(
                self._counters["memory_cache_misses_total"].values()
            )
            cache_hit_rate = (
                total_cache_hits / (total_cache_hits + total_cache_misses)
                if (total_cache_hits + total_cache_misses) > 0
                else 0.0
            )
            total_consolidations = sum(
                self._counters["memory_consolidations_total"].values()
            )
            total_episodes = sum(
                self._counters["memory_episodes_recorded_total"].values()
            )
            # Calculate average latencies
            retrieval_latencies = self._histograms.get(
                "memory_retrieval_latency_seconds", []
            )
            avg_retrieval_latency = (
                sum(retrieval_latencies) / len(retrieval_latencies)
                if retrieval_latencies
                else 0.0
            )
            return {
                "total_operations": total_operations,
                "successful_operations": successful_operations,
                "operation_success_rate": (
                    successful_operations / total_operations
                    if total_operations > 0
                    else 1.0
                ),
                "total_retrievals": total_retrievals,
                "cache_hit_rate": cache_hit_rate,
                "total_consolidations": total_consolidations,
                "total_episodes_recorded": total_episodes,
                "avg_retrieval_latency_ms": avg_retrieval_latency * 1000,
                "patterns_detected": sum(
                    self._counters["memory_patterns_detected_total"].values()
                ),
                "insights_generated": sum(
                    self._counters["memory_insights_generated_total"].values()
                ),
                "anomalies_detected": sum(
                    self._counters["memory_anomalies_detected_total"].values()
                ),
                "active_sessions": self._gauges.get("memory_active_sessions", {}).get(
                    "", 0
                ),
                "pending_consolidations": self._gauges.get(
                    "memory_pending_consolidations", {}
                ).get("", 0),
            }
    async def get_cache_stats(self) -> dict[str, Any]:
        """Get detailed cache statistics."""
        async with self._lock:
            stats: dict[str, Any] = {}
            # Get hits/misses by cache type
            for labels_str, hits in self._counters["memory_cache_hits_total"].items():
                cache_type = self._parse_labels(labels_str).get(
                    "cache_type", "unknown"
                )
                if cache_type not in stats:
                    stats[cache_type] = {"hits": 0, "misses": 0}
                stats[cache_type]["hits"] = hits
            for labels_str, misses in self._counters[
                "memory_cache_misses_total"
            ].items():
                cache_type = self._parse_labels(labels_str).get(
                    "cache_type", "unknown"
                )
                if cache_type not in stats:
                    stats[cache_type] = {"hits": 0, "misses": 0}
                stats[cache_type]["misses"] = misses
            # Calculate hit rates
            for data in stats.values():
                total = data["hits"] + data["misses"]
                data["hit_rate"] = data["hits"] / total if total > 0 else 0.0
                data["total"] = total
            return stats
    async def reset(self) -> None:
        """Reset all metrics."""
        async with self._lock:
            self._counters.clear()
            self._gauges.clear()
            self._histograms.clear()
            self._init_histogram_buckets()
    def _parse_labels(self, labels_str: str) -> dict[str, str]:
        """Parse labels string into dictionary."""
        if not labels_str:
            return {}
        labels = {}
        for pair in labels_str.split(","):
            if "=" in pair:
                key, value = pair.split("=", 1)
                labels[key.strip()] = value.strip()
        return labels
 # Singleton instance
 _metrics: MemoryMetrics | None = None
 _lock = asyncio.Lock()
 async def get_memory_metrics() -> MemoryMetrics:
    """Get the singleton MemoryMetrics instance."""
    global _metrics
    async with _lock:
        if _metrics is None:
            _metrics = MemoryMetrics()
        return _metrics
 def reset_memory_metrics() -> None:
    """Reset the singleton instance (for testing)."""
    global _metrics
    _metrics = None
 # Convenience functions
 async def record_memory_operation(
    operation: str,
    memory_type: str,
    scope: str | None = None,
    success: bool = True,
    latency_ms: float | None = None,
 ) -> None:
    """Record a memory operation."""
    metrics = await get_memory_metrics()
    await metrics.inc_operations(operation, memory_type, scope, success)
    if latency_ms is not None and memory_type == "working":
        await metrics.observe_working_latency(latency_ms / 1000)
 async def record_retrieval(
    memory_type: str,
    strategy: str,
    results_count: int,
    latency_ms: float,
 ) -> None:
    """Record a retrieval operation."""
    metrics = await get_memory_metrics()
    await metrics.inc_retrieval(memory_type, strategy, results_count)
    await metrics.observe_retrieval_latency(latency_ms / 1000)
--- a/backend/tests/unit/services/memory/metrics/init.py
+++ b/backend/tests/unit/services/memory/metrics/init.py
@@ -0,0 +1,2 @@
 # tests/unit/services/memory/metrics/__init__.py
 """Tests for Memory Metrics."""
--- a/backend/tests/unit/services/memory/metrics/test_collector.py
+++ b/backend/tests/unit/services/memory/metrics/test_collector.py
@@ -0,0 +1,470 @@
 # tests/unit/services/memory/metrics/test_collector.py
 """Tests for Memory Metrics Collector."""
 import pytest
 from app.services.memory.metrics.collector import (
    MemoryMetrics,
    MetricType,
    MetricValue,
    get_memory_metrics,
    record_memory_operation,
    record_retrieval,
    reset_memory_metrics,
 )
@pytest.fixture
 def metrics() -> MemoryMetrics:
    """Create a fresh metrics instance for each test."""
    return MemoryMetrics()
@pytest.fixture(autouse=True)
 def reset_singleton() -> None:
    """Reset singleton before each test."""
    reset_memory_metrics()
 class TestMemoryMetrics:
    """Tests for MemoryMetrics class."""
    @pytest.mark.asyncio
    async def test_inc_operations(self, metrics: MemoryMetrics) -> None:
        """Should increment operation counters."""
        await metrics.inc_operations("get", "working", "session", True)
        await metrics.inc_operations("get", "working", "session", True)
        await metrics.inc_operations("set", "working", "session", True)
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 3
        assert summary["successful_operations"] == 3
    @pytest.mark.asyncio
    async def test_inc_operations_failure(self, metrics: MemoryMetrics) -> None:
        """Should track failed operations."""
        await metrics.inc_operations("get", "working", None, True)
        await metrics.inc_operations("get", "working", None, False)
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 2
        assert summary["successful_operations"] == 1
        assert summary["operation_success_rate"] == 0.5
    @pytest.mark.asyncio
    async def test_inc_retrieval(self, metrics: MemoryMetrics) -> None:
        """Should increment retrieval counters."""
        await metrics.inc_retrieval("episodic", "similarity", 5)
        await metrics.inc_retrieval("episodic", "temporal", 3)
        await metrics.inc_retrieval("semantic", "similarity", 10)
        summary = await metrics.get_summary()
        assert summary["total_retrievals"] == 3
    @pytest.mark.asyncio
    async def test_cache_hit_miss(self, metrics: MemoryMetrics) -> None:
        """Should track cache hits and misses."""
        await metrics.inc_cache_hit("hot")
        await metrics.inc_cache_hit("hot")
        await metrics.inc_cache_hit("hot")
        await metrics.inc_cache_miss("hot")
        summary = await metrics.get_summary()
        assert summary["cache_hit_rate"] == 0.75
    @pytest.mark.asyncio
    async def test_cache_stats(self, metrics: MemoryMetrics) -> None:
        """Should provide detailed cache stats."""
        await metrics.inc_cache_hit("hot")
        await metrics.inc_cache_hit("hot")
        await metrics.inc_cache_miss("hot")
        await metrics.inc_cache_hit("embedding")
        await metrics.inc_cache_miss("embedding")
        await metrics.inc_cache_miss("embedding")
        stats = await metrics.get_cache_stats()
        assert stats["hot"]["hits"] == 2
        assert stats["hot"]["misses"] == 1
        assert stats["hot"]["hit_rate"] == pytest.approx(0.6667, rel=0.01)
        assert stats["embedding"]["hits"] == 1
        assert stats["embedding"]["misses"] == 2
        assert stats["embedding"]["hit_rate"] == pytest.approx(0.3333, rel=0.01)
    @pytest.mark.asyncio
    async def test_inc_consolidation(self, metrics: MemoryMetrics) -> None:
        """Should increment consolidation counter."""
        await metrics.inc_consolidation("working_to_episodic", True)
        await metrics.inc_consolidation("episodic_to_semantic", True)
        await metrics.inc_consolidation("prune", False)
        summary = await metrics.get_summary()
        assert summary["total_consolidations"] == 3
    @pytest.mark.asyncio
    async def test_inc_episodes_recorded(self, metrics: MemoryMetrics) -> None:
        """Should track episodes by outcome."""
        await metrics.inc_episodes_recorded("success")
        await metrics.inc_episodes_recorded("success")
        await metrics.inc_episodes_recorded("failure")
        summary = await metrics.get_summary()
        assert summary["total_episodes_recorded"] == 3
    @pytest.mark.asyncio
    async def test_inc_patterns_insights_anomalies(
        self, metrics: MemoryMetrics
    ) -> None:
        """Should track reflection metrics."""
        await metrics.inc_patterns_detected("recurring_success")
        await metrics.inc_patterns_detected("action_sequence")
        await metrics.inc_insights_generated("optimization")
        await metrics.inc_anomalies_detected("unusual_duration")
        summary = await metrics.get_summary()
        assert summary["patterns_detected"] == 2
        assert summary["insights_generated"] == 1
        assert summary["anomalies_detected"] == 1
    @pytest.mark.asyncio
    async def test_set_memory_items_count(self, metrics: MemoryMetrics) -> None:
        """Should set memory item count gauge."""
        await metrics.set_memory_items_count("episodic", "project", 100)
        await metrics.set_memory_items_count("semantic", "project", 50)
        all_metrics = await metrics.get_all_metrics()
        gauge_metrics = [
            m for m in all_metrics if m.name == "memory_items_count"
        ]
        assert len(gauge_metrics) == 2
    @pytest.mark.asyncio
    async def test_set_memory_size_bytes(self, metrics: MemoryMetrics) -> None:
        """Should set memory size gauge."""
        await metrics.set_memory_size_bytes("working", "session", 1024)
        all_metrics = await metrics.get_all_metrics()
        size_metrics = [m for m in all_metrics if m.name == "memory_size_bytes"]
        assert len(size_metrics) == 1
        assert size_metrics[0].value == 1024.0
    @pytest.mark.asyncio
    async def test_set_procedure_success_rate(self, metrics: MemoryMetrics) -> None:
        """Should set procedure success rate gauge."""
        await metrics.set_procedure_success_rate("code_review", 0.85)
        all_metrics = await metrics.get_all_metrics()
        rate_metrics = [
            m for m in all_metrics if m.name == "memory_procedure_success_rate"
        ]
        assert len(rate_metrics) == 1
        assert rate_metrics[0].value == 0.85
    @pytest.mark.asyncio
    async def test_set_active_sessions(self, metrics: MemoryMetrics) -> None:
        """Should set active sessions gauge."""
        await metrics.set_active_sessions(5)
        summary = await metrics.get_summary()
        assert summary["active_sessions"] == 5
    @pytest.mark.asyncio
    async def test_observe_working_latency(self, metrics: MemoryMetrics) -> None:
        """Should record working memory latency histogram."""
        await metrics.observe_working_latency(0.005)  # 5ms
        await metrics.observe_working_latency(0.003)  # 3ms
        await metrics.observe_working_latency(0.010)  # 10ms
        all_metrics = await metrics.get_all_metrics()
        count_metric = next(
            (m for m in all_metrics if m.name == "memory_working_latency_seconds_count"),
            None,
        )
        sum_metric = next(
            (m for m in all_metrics if m.name == "memory_working_latency_seconds_sum"),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 3
        assert sum_metric is not None
        assert sum_metric.value == pytest.approx(0.018, rel=0.01)
    @pytest.mark.asyncio
    async def test_observe_retrieval_latency(self, metrics: MemoryMetrics) -> None:
        """Should record retrieval latency histogram."""
        await metrics.observe_retrieval_latency(0.050)  # 50ms
        await metrics.observe_retrieval_latency(0.075)  # 75ms
        summary = await metrics.get_summary()
        assert summary["avg_retrieval_latency_ms"] == pytest.approx(62.5, rel=0.01)
    @pytest.mark.asyncio
    async def test_observe_consolidation_duration(
        self, metrics: MemoryMetrics
    ) -> None:
        """Should record consolidation duration histogram."""
        await metrics.observe_consolidation_duration(5.0)
        await metrics.observe_consolidation_duration(10.0)
        all_metrics = await metrics.get_all_metrics()
        count_metric = next(
            (
                m
                for m in all_metrics
                if m.name == "memory_consolidation_duration_seconds_count"
            ),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 2
    @pytest.mark.asyncio
    async def test_get_all_metrics(self, metrics: MemoryMetrics) -> None:
        """Should return all metrics as MetricValue objects."""
        await metrics.inc_operations("get", "working", None, True)
        await metrics.set_active_sessions(3)
        await metrics.observe_retrieval_latency(0.05)
        all_metrics = await metrics.get_all_metrics()
        assert len(all_metrics) >= 3
        # Check we have different metric types
        counter_metrics = [m for m in all_metrics if m.metric_type == MetricType.COUNTER]
        gauge_metrics = [m for m in all_metrics if m.metric_type == MetricType.GAUGE]
        assert len(counter_metrics) >= 1
        assert len(gauge_metrics) >= 1
    @pytest.mark.asyncio
    async def test_get_prometheus_format(self, metrics: MemoryMetrics) -> None:
        """Should export metrics in Prometheus format."""
        await metrics.inc_operations("get", "working", "session", True)
        await metrics.set_active_sessions(5)
        prometheus_output = await metrics.get_prometheus_format()
        assert "# TYPE memory_operations_total counter" in prometheus_output
        assert "memory_operations_total{" in prometheus_output
        assert "# TYPE memory_active_sessions gauge" in prometheus_output
        assert "memory_active_sessions 5" in prometheus_output
    @pytest.mark.asyncio
    async def test_get_summary(self, metrics: MemoryMetrics) -> None:
        """Should return summary dictionary."""
        await metrics.inc_operations("get", "working", None, True)
        await metrics.inc_retrieval("episodic", "similarity", 5)
        await metrics.inc_cache_hit("hot")
        await metrics.inc_consolidation("prune", True)
        summary = await metrics.get_summary()
        assert "total_operations" in summary
        assert "total_retrievals" in summary
        assert "cache_hit_rate" in summary
        assert "total_consolidations" in summary
        assert "operation_success_rate" in summary
    @pytest.mark.asyncio
    async def test_reset(self, metrics: MemoryMetrics) -> None:
        """Should reset all metrics."""
        await metrics.inc_operations("get", "working", None, True)
        await metrics.set_active_sessions(5)
        await metrics.observe_retrieval_latency(0.05)
        await metrics.reset()
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 0
        assert summary["active_sessions"] == 0
 class TestMetricValue:
    """Tests for MetricValue dataclass."""
    def test_creates_metric_value(self) -> None:
        """Should create metric value with defaults."""
        metric = MetricValue(
            name="test_metric",
            metric_type=MetricType.COUNTER,
            value=42.0,
        )
        assert metric.name == "test_metric"
        assert metric.metric_type == MetricType.COUNTER
        assert metric.value == 42.0
        assert metric.labels == {}
        assert metric.timestamp is not None
    def test_creates_metric_value_with_labels(self) -> None:
        """Should create metric value with labels."""
        metric = MetricValue(
            name="test_metric",
            metric_type=MetricType.GAUGE,
            value=100.0,
            labels={"scope": "project", "type": "episodic"},
        )
        assert metric.labels == {"scope": "project", "type": "episodic"}
 class TestSingleton:
    """Tests for singleton pattern."""
    @pytest.mark.asyncio
    async def test_get_memory_metrics_singleton(self) -> None:
        """Should return same instance."""
        metrics1 = await get_memory_metrics()
        metrics2 = await get_memory_metrics()
        assert metrics1 is metrics2
    @pytest.mark.asyncio
    async def test_reset_singleton(self) -> None:
        """Should reset singleton instance."""
        metrics1 = await get_memory_metrics()
        await metrics1.inc_operations("get", "working", None, True)
        reset_memory_metrics()
        metrics2 = await get_memory_metrics()
        summary = await metrics2.get_summary()
        assert metrics1 is not metrics2
        assert summary["total_operations"] == 0
 class TestConvenienceFunctions:
    """Tests for convenience functions."""
    @pytest.mark.asyncio
    async def test_record_memory_operation(self) -> None:
        """Should record memory operation."""
        await record_memory_operation(
            operation="get",
            memory_type="working",
            scope="session",
            success=True,
            latency_ms=5.0,
        )
        metrics = await get_memory_metrics()
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 1
    @pytest.mark.asyncio
    async def test_record_retrieval(self) -> None:
        """Should record retrieval operation."""
        await record_retrieval(
            memory_type="episodic",
            strategy="similarity",
            results_count=10,
            latency_ms=50.0,
        )
        metrics = await get_memory_metrics()
        summary = await metrics.get_summary()
        assert summary["total_retrievals"] == 1
        assert summary["avg_retrieval_latency_ms"] == pytest.approx(50.0, rel=0.01)
 class TestHistogramBuckets:
    """Tests for histogram bucket behavior."""
    @pytest.mark.asyncio
    async def test_histogram_buckets_populated(self, metrics: MemoryMetrics) -> None:
        """Should populate histogram buckets correctly."""
        # Add values to different buckets
        await metrics.observe_retrieval_latency(0.005)  # <= 0.01
        await metrics.observe_retrieval_latency(0.030)  # <= 0.05
        await metrics.observe_retrieval_latency(0.080)  # <= 0.1
        await metrics.observe_retrieval_latency(0.500)  # <= 0.5
        await metrics.observe_retrieval_latency(2.000)  # <= 2.5
        prometheus_output = await metrics.get_prometheus_format()
        # Check that histogram buckets are in output
        assert "memory_retrieval_latency_seconds_bucket" in prometheus_output
        assert 'le="0.01"' in prometheus_output
        assert 'le="+Inf"' in prometheus_output
    @pytest.mark.asyncio
    async def test_histogram_count_and_sum(self, metrics: MemoryMetrics) -> None:
        """Should track histogram count and sum."""
        await metrics.observe_retrieval_latency(0.1)
        await metrics.observe_retrieval_latency(0.2)
        await metrics.observe_retrieval_latency(0.3)
        prometheus_output = await metrics.get_prometheus_format()
        assert "memory_retrieval_latency_seconds_count 3" in prometheus_output
        assert "memory_retrieval_latency_seconds_sum 0.6" in prometheus_output
 class TestLabelParsing:
    """Tests for label parsing."""
    @pytest.mark.asyncio
    async def test_parse_labels_in_output(self, metrics: MemoryMetrics) -> None:
        """Should correctly parse labels in output."""
        await metrics.inc_operations("get", "episodic", "project", True)
        all_metrics = await metrics.get_all_metrics()
        op_metric = next(
            (m for m in all_metrics if m.name == "memory_operations_total"), None
        )
        assert op_metric is not None
        assert op_metric.labels["operation"] == "get"
        assert op_metric.labels["memory_type"] == "episodic"
        assert op_metric.labels["scope"] == "project"
        assert op_metric.labels["success"] == "true"
 class TestEdgeCases:
    """Tests for edge cases."""
    @pytest.mark.asyncio
    async def test_empty_metrics(self, metrics: MemoryMetrics) -> None:
        """Should handle empty metrics gracefully."""
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 0
        assert summary["operation_success_rate"] == 1.0  # Default when no ops
        assert summary["cache_hit_rate"] == 0.0
        assert summary["avg_retrieval_latency_ms"] == 0.0
    @pytest.mark.asyncio
    async def test_concurrent_operations(self, metrics: MemoryMetrics) -> None:
        """Should handle concurrent operations safely."""
        import asyncio
        async def increment_ops() -> None:
            for _ in range(100):
                await metrics.inc_operations("get", "working", None, True)
        # Run multiple concurrent tasks
        await asyncio.gather(
            increment_ops(),
            increment_ops(),
            increment_ops(),
        )
        summary = await metrics.get_summary()
        assert summary["total_operations"] == 300
    @pytest.mark.asyncio
    async def test_prometheus_format_empty(self, metrics: MemoryMetrics) -> None:
        """Should return valid format with no metrics."""
        prometheus_output = await metrics.get_prometheus_format()
        # Should just have histogram bucket definitions
        assert "# TYPE memory_retrieval_latency_seconds histogram" in prometheus_output
		`@@ -0,0 +1,2 @@`
							`# tests/unit/services/memory/metrics/__init__.py`
							`"""Tests for Memory Metrics."""`