fast-next-template/backend/tests/services/safety/test_metrics.py

"""
Tests for Safety Metrics Collector.

Tests cover:
- MetricType, MetricValue, HistogramBucket data structures
- SafetyMetrics counters, gauges, histograms
- Prometheus format export
- Summary and reset operations
- Singleton pattern and convenience functions
"""

import pytest
import pytest_asyncio

from app.services.safety.metrics.collector import (
    HistogramBucket,
    MetricType,
    MetricValue,
    SafetyMetrics,
    get_safety_metrics,
    record_mcp_call,
    record_validation,
)


class TestMetricType:
    """Tests for MetricType enum."""

    def test_metric_types_exist(self):
        """Test all metric types are defined."""
        assert MetricType.COUNTER == "counter"
        assert MetricType.GAUGE == "gauge"
        assert MetricType.HISTOGRAM == "histogram"

    def test_metric_type_is_string(self):
        """Test MetricType values are strings."""
        assert isinstance(MetricType.COUNTER.value, str)
        assert isinstance(MetricType.GAUGE.value, str)
        assert isinstance(MetricType.HISTOGRAM.value, str)


class TestMetricValue:
    """Tests for MetricValue dataclass."""

    def test_metric_value_creation(self):
        """Test creating a metric value."""
        mv = MetricValue(
            name="test_metric",
            metric_type=MetricType.COUNTER,
            value=42.0,
            labels={"env": "test"},
        )

        assert mv.name == "test_metric"
        assert mv.metric_type == MetricType.COUNTER
        assert mv.value == 42.0
        assert mv.labels == {"env": "test"}
        assert mv.timestamp is not None

    def test_metric_value_defaults(self):
        """Test metric value default values."""
        mv = MetricValue(
            name="test",
            metric_type=MetricType.GAUGE,
            value=0.0,
        )

        assert mv.labels == {}
        assert mv.timestamp is not None


class TestHistogramBucket:
    """Tests for HistogramBucket dataclass."""

    def test_histogram_bucket_creation(self):
        """Test creating a histogram bucket."""
        bucket = HistogramBucket(le=0.5, count=10)

        assert bucket.le == 0.5
        assert bucket.count == 10

    def test_histogram_bucket_defaults(self):
        """Test histogram bucket default count."""
        bucket = HistogramBucket(le=1.0)

        assert bucket.le == 1.0
        assert bucket.count == 0

    def test_histogram_bucket_infinity(self):
        """Test histogram bucket with infinity."""
        bucket = HistogramBucket(le=float("inf"))

        assert bucket.le == float("inf")


class TestSafetyMetricsCounters:
    """Tests for SafetyMetrics counter methods."""

    @pytest_asyncio.fixture
    async def metrics(self):
        """Create fresh metrics instance."""
        return SafetyMetrics()

    @pytest.mark.asyncio
    async def test_inc_validations(self, metrics):
        """Test incrementing validations counter."""
        await metrics.inc_validations("allow")
        await metrics.inc_validations("allow")
        await metrics.inc_validations("deny", agent_id="agent-1")

        summary = await metrics.get_summary()
        assert summary["total_validations"] == 3
        assert summary["denied_validations"] == 1

    @pytest.mark.asyncio
    async def test_inc_approvals_requested(self, metrics):
        """Test incrementing approval requests counter."""
        await metrics.inc_approvals_requested("normal")
        await metrics.inc_approvals_requested("urgent")
        await metrics.inc_approvals_requested()  # default

        summary = await metrics.get_summary()
        assert summary["approval_requests"] == 3

    @pytest.mark.asyncio
    async def test_inc_approvals_granted(self, metrics):
        """Test incrementing approvals granted counter."""
        await metrics.inc_approvals_granted()
        await metrics.inc_approvals_granted()

        summary = await metrics.get_summary()
        assert summary["approvals_granted"] == 2

    @pytest.mark.asyncio
    async def test_inc_approvals_denied(self, metrics):
        """Test incrementing approvals denied counter."""
        await metrics.inc_approvals_denied("timeout")
        await metrics.inc_approvals_denied("policy")
        await metrics.inc_approvals_denied()  # default manual

        summary = await metrics.get_summary()
        assert summary["approvals_denied"] == 3

    @pytest.mark.asyncio
    async def test_inc_rate_limit_exceeded(self, metrics):
        """Test incrementing rate limit exceeded counter."""
        await metrics.inc_rate_limit_exceeded("requests_per_minute")
        await metrics.inc_rate_limit_exceeded("tokens_per_hour")

        summary = await metrics.get_summary()
        assert summary["rate_limit_hits"] == 2

    @pytest.mark.asyncio
    async def test_inc_budget_exceeded(self, metrics):
        """Test incrementing budget exceeded counter."""
        await metrics.inc_budget_exceeded("daily_cost")
        await metrics.inc_budget_exceeded("monthly_tokens")

        summary = await metrics.get_summary()
        assert summary["budget_exceeded"] == 2

    @pytest.mark.asyncio
    async def test_inc_loops_detected(self, metrics):
        """Test incrementing loops detected counter."""
        await metrics.inc_loops_detected("repetition")
        await metrics.inc_loops_detected("pattern")

        summary = await metrics.get_summary()
        assert summary["loops_detected"] == 2

    @pytest.mark.asyncio
    async def test_inc_emergency_events(self, metrics):
        """Test incrementing emergency events counter."""
        await metrics.inc_emergency_events("pause", "project-1")
        await metrics.inc_emergency_events("stop", "agent-2")

        summary = await metrics.get_summary()
        assert summary["emergency_events"] == 2

    @pytest.mark.asyncio
    async def test_inc_content_filtered(self, metrics):
        """Test incrementing content filtered counter."""
        await metrics.inc_content_filtered("profanity", "blocked")
        await metrics.inc_content_filtered("pii", "redacted")

        summary = await metrics.get_summary()
        assert summary["content_filtered"] == 2

    @pytest.mark.asyncio
    async def test_inc_checkpoints_created(self, metrics):
        """Test incrementing checkpoints created counter."""
        await metrics.inc_checkpoints_created()
        await metrics.inc_checkpoints_created()
        await metrics.inc_checkpoints_created()

        summary = await metrics.get_summary()
        assert summary["checkpoints_created"] == 3

    @pytest.mark.asyncio
    async def test_inc_rollbacks_executed(self, metrics):
        """Test incrementing rollbacks executed counter."""
        await metrics.inc_rollbacks_executed(success=True)
        await metrics.inc_rollbacks_executed(success=False)

        summary = await metrics.get_summary()
        assert summary["rollbacks_executed"] == 2

    @pytest.mark.asyncio
    async def test_inc_mcp_calls(self, metrics):
        """Test incrementing MCP calls counter."""
        await metrics.inc_mcp_calls("search_knowledge", success=True)
        await metrics.inc_mcp_calls("run_code", success=False)

        summary = await metrics.get_summary()
        assert summary["mcp_calls"] == 2


class TestSafetyMetricsGauges:
    """Tests for SafetyMetrics gauge methods."""

    @pytest_asyncio.fixture
    async def metrics(self):
        """Create fresh metrics instance."""
        return SafetyMetrics()

    @pytest.mark.asyncio
    async def test_set_budget_remaining(self, metrics):
        """Test setting budget remaining gauge."""
        await metrics.set_budget_remaining("project-1", "daily_cost", 50.0)

        all_metrics = await metrics.get_all_metrics()
        gauge_metrics = [m for m in all_metrics if m.name == "safety_budget_remaining"]
        assert len(gauge_metrics) == 1
        assert gauge_metrics[0].value == 50.0
        assert gauge_metrics[0].labels["scope"] == "project-1"
        assert gauge_metrics[0].labels["budget_type"] == "daily_cost"

    @pytest.mark.asyncio
    async def test_set_rate_limit_remaining(self, metrics):
        """Test setting rate limit remaining gauge."""
        await metrics.set_rate_limit_remaining("agent-1", "requests_per_minute", 45)

        all_metrics = await metrics.get_all_metrics()
        gauge_metrics = [
            m for m in all_metrics if m.name == "safety_rate_limit_remaining"
        ]
        assert len(gauge_metrics) == 1
        assert gauge_metrics[0].value == 45.0

    @pytest.mark.asyncio
    async def test_set_pending_approvals(self, metrics):
        """Test setting pending approvals gauge."""
        await metrics.set_pending_approvals(5)

        summary = await metrics.get_summary()
        assert summary["pending_approvals"] == 5

    @pytest.mark.asyncio
    async def test_set_active_checkpoints(self, metrics):
        """Test setting active checkpoints gauge."""
        await metrics.set_active_checkpoints(3)

        summary = await metrics.get_summary()
        assert summary["active_checkpoints"] == 3

    @pytest.mark.asyncio
    async def test_set_emergency_state(self, metrics):
        """Test setting emergency state gauge."""
        await metrics.set_emergency_state("project-1", "normal")
        await metrics.set_emergency_state("project-2", "paused")
        await metrics.set_emergency_state("project-3", "stopped")
        await metrics.set_emergency_state("project-4", "unknown")

        all_metrics = await metrics.get_all_metrics()
        state_metrics = [m for m in all_metrics if m.name == "safety_emergency_state"]
        assert len(state_metrics) == 4

        # Check state values
        values_by_scope = {m.labels["scope"]: m.value for m in state_metrics}
        assert values_by_scope["project-1"] == 0.0  # normal
        assert values_by_scope["project-2"] == 1.0  # paused
        assert values_by_scope["project-3"] == 2.0  # stopped
        assert values_by_scope["project-4"] == -1.0  # unknown


class TestSafetyMetricsHistograms:
    """Tests for SafetyMetrics histogram methods."""

    @pytest_asyncio.fixture
    async def metrics(self):
        """Create fresh metrics instance."""
        return SafetyMetrics()

    @pytest.mark.asyncio
    async def test_observe_validation_latency(self, metrics):
        """Test observing validation latency."""
        await metrics.observe_validation_latency(0.05)
        await metrics.observe_validation_latency(0.15)
        await metrics.observe_validation_latency(0.5)

        all_metrics = await metrics.get_all_metrics()

        count_metric = next(
            (m for m in all_metrics if m.name == "validation_latency_seconds_count"),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 3.0

        sum_metric = next(
            (m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
            None,
        )
        assert sum_metric is not None
        assert abs(sum_metric.value - 0.7) < 0.001

    @pytest.mark.asyncio
    async def test_observe_approval_latency(self, metrics):
        """Test observing approval latency."""
        await metrics.observe_approval_latency(1.5)
        await metrics.observe_approval_latency(3.0)

        all_metrics = await metrics.get_all_metrics()

        count_metric = next(
            (m for m in all_metrics if m.name == "approval_latency_seconds_count"),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 2.0

    @pytest.mark.asyncio
    async def test_observe_mcp_execution_latency(self, metrics):
        """Test observing MCP execution latency."""
        await metrics.observe_mcp_execution_latency(0.02)

        all_metrics = await metrics.get_all_metrics()

        count_metric = next(
            (m for m in all_metrics if m.name == "mcp_execution_latency_seconds_count"),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 1.0

    @pytest.mark.asyncio
    async def test_histogram_bucket_updates(self, metrics):
        """Test that histogram buckets are updated correctly."""
        # Add values to test bucket distribution
        await metrics.observe_validation_latency(0.005)  # <= 0.01
        await metrics.observe_validation_latency(0.03)  # <= 0.05
        await metrics.observe_validation_latency(0.07)  # <= 0.1
        await metrics.observe_validation_latency(15.0)  # <= inf

        prometheus = await metrics.get_prometheus_format()

        # Check that bucket counts are in output
        assert "validation_latency_seconds_bucket" in prometheus
        assert "le=" in prometheus


class TestSafetyMetricsExport:
    """Tests for SafetyMetrics export methods."""

    @pytest_asyncio.fixture
    async def metrics(self):
        """Create fresh metrics instance with some data."""
        m = SafetyMetrics()

        # Add some counters
        await m.inc_validations("allow")
        await m.inc_validations("deny", agent_id="agent-1")

        # Add some gauges
        await m.set_pending_approvals(3)
        await m.set_budget_remaining("proj-1", "daily", 100.0)

        # Add some histogram values
        await m.observe_validation_latency(0.1)

        return m

    @pytest.mark.asyncio
    async def test_get_all_metrics(self, metrics):
        """Test getting all metrics."""
        all_metrics = await metrics.get_all_metrics()

        assert len(all_metrics) > 0
        assert all(isinstance(m, MetricValue) for m in all_metrics)

        # Check we have different types
        types = {m.metric_type for m in all_metrics}
        assert MetricType.COUNTER in types
        assert MetricType.GAUGE in types

    @pytest.mark.asyncio
    async def test_get_prometheus_format(self, metrics):
        """Test Prometheus format export."""
        output = await metrics.get_prometheus_format()

        assert isinstance(output, str)
        assert "# TYPE" in output
        assert "counter" in output
        assert "gauge" in output
        assert "safety_validations_total" in output
        assert "safety_pending_approvals" in output

    @pytest.mark.asyncio
    async def test_prometheus_format_with_labels(self, metrics):
        """Test Prometheus format includes labels correctly."""
        output = await metrics.get_prometheus_format()

        # Counter with labels
        assert "decision=allow" in output or "decision=deny" in output

    @pytest.mark.asyncio
    async def test_prometheus_format_histogram_buckets(self, metrics):
        """Test Prometheus format includes histogram buckets."""
        output = await metrics.get_prometheus_format()

        assert "histogram" in output
        assert "_bucket" in output
        assert "le=" in output
        assert "+Inf" in output

    @pytest.mark.asyncio
    async def test_get_summary(self, metrics):
        """Test getting summary."""
        summary = await metrics.get_summary()

        assert "total_validations" in summary
        assert "denied_validations" in summary
        assert "approval_requests" in summary
        assert "pending_approvals" in summary
        assert "active_checkpoints" in summary

        assert summary["total_validations"] == 2
        assert summary["denied_validations"] == 1
        assert summary["pending_approvals"] == 3

    @pytest.mark.asyncio
    async def test_summary_empty_counters(self):
        """Test summary with no data."""
        metrics = SafetyMetrics()
        summary = await metrics.get_summary()

        assert summary["total_validations"] == 0
        assert summary["denied_validations"] == 0
        assert summary["pending_approvals"] == 0


class TestSafetyMetricsReset:
    """Tests for SafetyMetrics reset."""

    @pytest.mark.asyncio
    async def test_reset_clears_counters(self):
        """Test reset clears all counters."""
        metrics = SafetyMetrics()

        await metrics.inc_validations("allow")
        await metrics.inc_approvals_granted()
        await metrics.set_pending_approvals(5)
        await metrics.observe_validation_latency(0.1)

        await metrics.reset()

        summary = await metrics.get_summary()
        assert summary["total_validations"] == 0
        assert summary["approvals_granted"] == 0
        assert summary["pending_approvals"] == 0

    @pytest.mark.asyncio
    async def test_reset_reinitializes_histogram_buckets(self):
        """Test reset reinitializes histogram buckets."""
        metrics = SafetyMetrics()

        await metrics.observe_validation_latency(0.1)
        await metrics.reset()

        # After reset, histogram buckets should be reinitialized
        prometheus = await metrics.get_prometheus_format()
        assert "validation_latency_seconds" in prometheus


class TestParseLabels:
    """Tests for _parse_labels helper method."""

    def test_parse_empty_labels(self):
        """Test parsing empty labels string."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels("")
        assert result == {}

    def test_parse_single_label(self):
        """Test parsing single label."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels("key=value")
        assert result == {"key": "value"}

    def test_parse_multiple_labels(self):
        """Test parsing multiple labels."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels("a=1,b=2,c=3")
        assert result == {"a": "1", "b": "2", "c": "3"}

    def test_parse_labels_with_spaces(self):
        """Test parsing labels with spaces."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels(" key = value , foo = bar ")
        assert result == {"key": "value", "foo": "bar"}

    def test_parse_labels_with_equals_in_value(self):
        """Test parsing labels with = in value."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels("query=a=b")
        assert result == {"query": "a=b"}

    def test_parse_invalid_label(self):
        """Test parsing invalid label without equals."""
        metrics = SafetyMetrics()
        result = metrics._parse_labels("no_equals")
        assert result == {}


class TestHistogramBucketInit:
    """Tests for histogram bucket initialization."""

    def test_histogram_buckets_initialized(self):
        """Test that histogram buckets are initialized."""
        metrics = SafetyMetrics()

        assert "validation_latency_seconds" in metrics._histogram_buckets
        assert "approval_latency_seconds" in metrics._histogram_buckets
        assert "mcp_execution_latency_seconds" in metrics._histogram_buckets

    def test_histogram_buckets_have_correct_values(self):
        """Test histogram buckets have correct boundary values."""
        metrics = SafetyMetrics()

        buckets = metrics._histogram_buckets["validation_latency_seconds"]

        # Check first few and last bucket
        assert buckets[0].le == 0.01
        assert buckets[1].le == 0.05
        assert buckets[-1].le == float("inf")

        # Check all have zero initial count
        assert all(b.count == 0 for b in buckets)


class TestSingletonAndConvenience:
    """Tests for singleton pattern and convenience functions."""

    @pytest.mark.asyncio
    async def test_get_safety_metrics_returns_same_instance(self):
        """Test get_safety_metrics returns singleton."""
        # Reset the module-level singleton for this test
        import app.services.safety.metrics.collector as collector_module

        collector_module._metrics = None

        m1 = await get_safety_metrics()
        m2 = await get_safety_metrics()

        assert m1 is m2

    @pytest.mark.asyncio
    async def test_record_validation_convenience(self):
        """Test record_validation convenience function."""
        import app.services.safety.metrics.collector as collector_module

        collector_module._metrics = None  # Reset

        await record_validation("allow")
        await record_validation("deny", agent_id="test-agent")

        metrics = await get_safety_metrics()
        summary = await metrics.get_summary()

        assert summary["total_validations"] == 2
        assert summary["denied_validations"] == 1

    @pytest.mark.asyncio
    async def test_record_mcp_call_convenience(self):
        """Test record_mcp_call convenience function."""
        import app.services.safety.metrics.collector as collector_module

        collector_module._metrics = None  # Reset

        await record_mcp_call("search_knowledge", success=True, latency_ms=50)
        await record_mcp_call("run_code", success=False, latency_ms=100)

        metrics = await get_safety_metrics()
        summary = await metrics.get_summary()

        assert summary["mcp_calls"] == 2


class TestConcurrency:
    """Tests for concurrent metric updates."""

    @pytest.mark.asyncio
    async def test_concurrent_counter_increments(self):
        """Test concurrent counter increments are safe."""
        import asyncio

        metrics = SafetyMetrics()

        async def increment_many():
            for _ in range(100):
                await metrics.inc_validations("allow")

        # Run 10 concurrent tasks each incrementing 100 times
        await asyncio.gather(*[increment_many() for _ in range(10)])

        summary = await metrics.get_summary()
        assert summary["total_validations"] == 1000

    @pytest.mark.asyncio
    async def test_concurrent_gauge_updates(self):
        """Test concurrent gauge updates are safe."""
        import asyncio

        metrics = SafetyMetrics()

        async def update_gauge(value):
            await metrics.set_pending_approvals(value)

        # Run concurrent gauge updates
        await asyncio.gather(*[update_gauge(i) for i in range(100)])

        # Final value should be one of the updates (last one wins)
        summary = await metrics.get_summary()
        assert 0 <= summary["pending_approvals"] < 100

    @pytest.mark.asyncio
    async def test_concurrent_histogram_observations(self):
        """Test concurrent histogram observations are safe."""
        import asyncio

        metrics = SafetyMetrics()

        async def observe_many():
            for i in range(100):
                await metrics.observe_validation_latency(i / 1000)

        await asyncio.gather(*[observe_many() for _ in range(10)])

        all_metrics = await metrics.get_all_metrics()
        count_metric = next(
            (m for m in all_metrics if m.name == "validation_latency_seconds_count"),
            None,
        )
        assert count_metric is not None
        assert count_metric.value == 1000.0


class TestEdgeCases:
    """Tests for edge cases."""

    @pytest.mark.asyncio
    async def test_very_large_counter_value(self):
        """Test handling very large counter values."""
        metrics = SafetyMetrics()

        for _ in range(10000):
            await metrics.inc_validations("allow")

        summary = await metrics.get_summary()
        assert summary["total_validations"] == 10000

    @pytest.mark.asyncio
    async def test_zero_and_negative_gauge_values(self):
        """Test zero and negative gauge values."""
        metrics = SafetyMetrics()

        await metrics.set_budget_remaining("project", "cost", 0.0)
        await metrics.set_budget_remaining("project2", "cost", -10.0)

        all_metrics = await metrics.get_all_metrics()
        gauges = [m for m in all_metrics if m.name == "safety_budget_remaining"]

        values = {m.labels.get("scope"): m.value for m in gauges}
        assert values["project"] == 0.0
        assert values["project2"] == -10.0

    @pytest.mark.asyncio
    async def test_very_small_histogram_values(self):
        """Test very small histogram values."""
        metrics = SafetyMetrics()

        await metrics.observe_validation_latency(0.0001)  # 0.1ms

        all_metrics = await metrics.get_all_metrics()
        sum_metric = next(
            (m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
            None,
        )
        assert sum_metric is not None
        assert abs(sum_metric.value - 0.0001) < 0.00001

    @pytest.mark.asyncio
    async def test_special_characters_in_labels(self):
        """Test special characters in label values."""
        metrics = SafetyMetrics()

        await metrics.inc_validations("allow", agent_id="agent/with/slashes")

        all_metrics = await metrics.get_all_metrics()
        counters = [m for m in all_metrics if m.name == "safety_validations_total"]

        # Should have the metric with special chars
        assert len(counters) > 0

    @pytest.mark.asyncio
    async def test_empty_histogram_export(self):
        """Test exporting histogram with no observations."""
        metrics = SafetyMetrics()

        # No observations, but histogram buckets should still exist
        prometheus = await metrics.get_prometheus_format()

        assert "validation_latency_seconds" in prometheus
        assert "le=" in prometheus

    @pytest.mark.asyncio
    async def test_prometheus_format_empty_label_value(self):
        """Test Prometheus format with empty label metrics."""
        metrics = SafetyMetrics()

        await metrics.inc_approvals_granted()  # Uses empty string as label

        prometheus = await metrics.get_prometheus_format()
        assert "safety_approvals_granted_total" in prometheus

    @pytest.mark.asyncio
    async def test_multiple_resets(self):
        """Test multiple resets don't cause issues."""
        metrics = SafetyMetrics()

        await metrics.inc_validations("allow")
        await metrics.reset()
        await metrics.reset()
        await metrics.reset()

        summary = await metrics.get_summary()
        assert summary["total_validations"] == 0