""" Tests for Safety Metrics Collector. Tests cover: - MetricType, MetricValue, HistogramBucket data structures - SafetyMetrics counters, gauges, histograms - Prometheus format export - Summary and reset operations - Singleton pattern and convenience functions """ import pytest import pytest_asyncio from app.services.safety.metrics.collector import ( HistogramBucket, MetricType, MetricValue, SafetyMetrics, get_safety_metrics, record_mcp_call, record_validation, ) class TestMetricType: """Tests for MetricType enum.""" def test_metric_types_exist(self): """Test all metric types are defined.""" assert MetricType.COUNTER == "counter" assert MetricType.GAUGE == "gauge" assert MetricType.HISTOGRAM == "histogram" def test_metric_type_is_string(self): """Test MetricType values are strings.""" assert isinstance(MetricType.COUNTER.value, str) assert isinstance(MetricType.GAUGE.value, str) assert isinstance(MetricType.HISTOGRAM.value, str) class TestMetricValue: """Tests for MetricValue dataclass.""" def test_metric_value_creation(self): """Test creating a metric value.""" mv = MetricValue( name="test_metric", metric_type=MetricType.COUNTER, value=42.0, labels={"env": "test"}, ) assert mv.name == "test_metric" assert mv.metric_type == MetricType.COUNTER assert mv.value == 42.0 assert mv.labels == {"env": "test"} assert mv.timestamp is not None def test_metric_value_defaults(self): """Test metric value default values.""" mv = MetricValue( name="test", metric_type=MetricType.GAUGE, value=0.0, ) assert mv.labels == {} assert mv.timestamp is not None class TestHistogramBucket: """Tests for HistogramBucket dataclass.""" def test_histogram_bucket_creation(self): """Test creating a histogram bucket.""" bucket = HistogramBucket(le=0.5, count=10) assert bucket.le == 0.5 assert bucket.count == 10 def test_histogram_bucket_defaults(self): """Test histogram bucket default count.""" bucket = HistogramBucket(le=1.0) assert bucket.le == 1.0 assert bucket.count == 0 def test_histogram_bucket_infinity(self): """Test histogram bucket with infinity.""" bucket = HistogramBucket(le=float("inf")) assert bucket.le == float("inf") class TestSafetyMetricsCounters: """Tests for SafetyMetrics counter methods.""" @pytest_asyncio.fixture async def metrics(self): """Create fresh metrics instance.""" return SafetyMetrics() @pytest.mark.asyncio async def test_inc_validations(self, metrics): """Test incrementing validations counter.""" await metrics.inc_validations("allow") await metrics.inc_validations("allow") await metrics.inc_validations("deny", agent_id="agent-1") summary = await metrics.get_summary() assert summary["total_validations"] == 3 assert summary["denied_validations"] == 1 @pytest.mark.asyncio async def test_inc_approvals_requested(self, metrics): """Test incrementing approval requests counter.""" await metrics.inc_approvals_requested("normal") await metrics.inc_approvals_requested("urgent") await metrics.inc_approvals_requested() # default summary = await metrics.get_summary() assert summary["approval_requests"] == 3 @pytest.mark.asyncio async def test_inc_approvals_granted(self, metrics): """Test incrementing approvals granted counter.""" await metrics.inc_approvals_granted() await metrics.inc_approvals_granted() summary = await metrics.get_summary() assert summary["approvals_granted"] == 2 @pytest.mark.asyncio async def test_inc_approvals_denied(self, metrics): """Test incrementing approvals denied counter.""" await metrics.inc_approvals_denied("timeout") await metrics.inc_approvals_denied("policy") await metrics.inc_approvals_denied() # default manual summary = await metrics.get_summary() assert summary["approvals_denied"] == 3 @pytest.mark.asyncio async def test_inc_rate_limit_exceeded(self, metrics): """Test incrementing rate limit exceeded counter.""" await metrics.inc_rate_limit_exceeded("requests_per_minute") await metrics.inc_rate_limit_exceeded("tokens_per_hour") summary = await metrics.get_summary() assert summary["rate_limit_hits"] == 2 @pytest.mark.asyncio async def test_inc_budget_exceeded(self, metrics): """Test incrementing budget exceeded counter.""" await metrics.inc_budget_exceeded("daily_cost") await metrics.inc_budget_exceeded("monthly_tokens") summary = await metrics.get_summary() assert summary["budget_exceeded"] == 2 @pytest.mark.asyncio async def test_inc_loops_detected(self, metrics): """Test incrementing loops detected counter.""" await metrics.inc_loops_detected("repetition") await metrics.inc_loops_detected("pattern") summary = await metrics.get_summary() assert summary["loops_detected"] == 2 @pytest.mark.asyncio async def test_inc_emergency_events(self, metrics): """Test incrementing emergency events counter.""" await metrics.inc_emergency_events("pause", "project-1") await metrics.inc_emergency_events("stop", "agent-2") summary = await metrics.get_summary() assert summary["emergency_events"] == 2 @pytest.mark.asyncio async def test_inc_content_filtered(self, metrics): """Test incrementing content filtered counter.""" await metrics.inc_content_filtered("profanity", "blocked") await metrics.inc_content_filtered("pii", "redacted") summary = await metrics.get_summary() assert summary["content_filtered"] == 2 @pytest.mark.asyncio async def test_inc_checkpoints_created(self, metrics): """Test incrementing checkpoints created counter.""" await metrics.inc_checkpoints_created() await metrics.inc_checkpoints_created() await metrics.inc_checkpoints_created() summary = await metrics.get_summary() assert summary["checkpoints_created"] == 3 @pytest.mark.asyncio async def test_inc_rollbacks_executed(self, metrics): """Test incrementing rollbacks executed counter.""" await metrics.inc_rollbacks_executed(success=True) await metrics.inc_rollbacks_executed(success=False) summary = await metrics.get_summary() assert summary["rollbacks_executed"] == 2 @pytest.mark.asyncio async def test_inc_mcp_calls(self, metrics): """Test incrementing MCP calls counter.""" await metrics.inc_mcp_calls("search_knowledge", success=True) await metrics.inc_mcp_calls("run_code", success=False) summary = await metrics.get_summary() assert summary["mcp_calls"] == 2 class TestSafetyMetricsGauges: """Tests for SafetyMetrics gauge methods.""" @pytest_asyncio.fixture async def metrics(self): """Create fresh metrics instance.""" return SafetyMetrics() @pytest.mark.asyncio async def test_set_budget_remaining(self, metrics): """Test setting budget remaining gauge.""" await metrics.set_budget_remaining("project-1", "daily_cost", 50.0) all_metrics = await metrics.get_all_metrics() gauge_metrics = [m for m in all_metrics if m.name == "safety_budget_remaining"] assert len(gauge_metrics) == 1 assert gauge_metrics[0].value == 50.0 assert gauge_metrics[0].labels["scope"] == "project-1" assert gauge_metrics[0].labels["budget_type"] == "daily_cost" @pytest.mark.asyncio async def test_set_rate_limit_remaining(self, metrics): """Test setting rate limit remaining gauge.""" await metrics.set_rate_limit_remaining("agent-1", "requests_per_minute", 45) all_metrics = await metrics.get_all_metrics() gauge_metrics = [ m for m in all_metrics if m.name == "safety_rate_limit_remaining" ] assert len(gauge_metrics) == 1 assert gauge_metrics[0].value == 45.0 @pytest.mark.asyncio async def test_set_pending_approvals(self, metrics): """Test setting pending approvals gauge.""" await metrics.set_pending_approvals(5) summary = await metrics.get_summary() assert summary["pending_approvals"] == 5 @pytest.mark.asyncio async def test_set_active_checkpoints(self, metrics): """Test setting active checkpoints gauge.""" await metrics.set_active_checkpoints(3) summary = await metrics.get_summary() assert summary["active_checkpoints"] == 3 @pytest.mark.asyncio async def test_set_emergency_state(self, metrics): """Test setting emergency state gauge.""" await metrics.set_emergency_state("project-1", "normal") await metrics.set_emergency_state("project-2", "paused") await metrics.set_emergency_state("project-3", "stopped") await metrics.set_emergency_state("project-4", "unknown") all_metrics = await metrics.get_all_metrics() state_metrics = [m for m in all_metrics if m.name == "safety_emergency_state"] assert len(state_metrics) == 4 # Check state values values_by_scope = {m.labels["scope"]: m.value for m in state_metrics} assert values_by_scope["project-1"] == 0.0 # normal assert values_by_scope["project-2"] == 1.0 # paused assert values_by_scope["project-3"] == 2.0 # stopped assert values_by_scope["project-4"] == -1.0 # unknown class TestSafetyMetricsHistograms: """Tests for SafetyMetrics histogram methods.""" @pytest_asyncio.fixture async def metrics(self): """Create fresh metrics instance.""" return SafetyMetrics() @pytest.mark.asyncio async def test_observe_validation_latency(self, metrics): """Test observing validation latency.""" await metrics.observe_validation_latency(0.05) await metrics.observe_validation_latency(0.15) await metrics.observe_validation_latency(0.5) all_metrics = await metrics.get_all_metrics() count_metric = next( (m for m in all_metrics if m.name == "validation_latency_seconds_count"), None, ) assert count_metric is not None assert count_metric.value == 3.0 sum_metric = next( (m for m in all_metrics if m.name == "validation_latency_seconds_sum"), None, ) assert sum_metric is not None assert abs(sum_metric.value - 0.7) < 0.001 @pytest.mark.asyncio async def test_observe_approval_latency(self, metrics): """Test observing approval latency.""" await metrics.observe_approval_latency(1.5) await metrics.observe_approval_latency(3.0) all_metrics = await metrics.get_all_metrics() count_metric = next( (m for m in all_metrics if m.name == "approval_latency_seconds_count"), None, ) assert count_metric is not None assert count_metric.value == 2.0 @pytest.mark.asyncio async def test_observe_mcp_execution_latency(self, metrics): """Test observing MCP execution latency.""" await metrics.observe_mcp_execution_latency(0.02) all_metrics = await metrics.get_all_metrics() count_metric = next( (m for m in all_metrics if m.name == "mcp_execution_latency_seconds_count"), None, ) assert count_metric is not None assert count_metric.value == 1.0 @pytest.mark.asyncio async def test_histogram_bucket_updates(self, metrics): """Test that histogram buckets are updated correctly.""" # Add values to test bucket distribution await metrics.observe_validation_latency(0.005) # <= 0.01 await metrics.observe_validation_latency(0.03) # <= 0.05 await metrics.observe_validation_latency(0.07) # <= 0.1 await metrics.observe_validation_latency(15.0) # <= inf prometheus = await metrics.get_prometheus_format() # Check that bucket counts are in output assert "validation_latency_seconds_bucket" in prometheus assert "le=" in prometheus class TestSafetyMetricsExport: """Tests for SafetyMetrics export methods.""" @pytest_asyncio.fixture async def metrics(self): """Create fresh metrics instance with some data.""" m = SafetyMetrics() # Add some counters await m.inc_validations("allow") await m.inc_validations("deny", agent_id="agent-1") # Add some gauges await m.set_pending_approvals(3) await m.set_budget_remaining("proj-1", "daily", 100.0) # Add some histogram values await m.observe_validation_latency(0.1) return m @pytest.mark.asyncio async def test_get_all_metrics(self, metrics): """Test getting all metrics.""" all_metrics = await metrics.get_all_metrics() assert len(all_metrics) > 0 assert all(isinstance(m, MetricValue) for m in all_metrics) # Check we have different types types = {m.metric_type for m in all_metrics} assert MetricType.COUNTER in types assert MetricType.GAUGE in types @pytest.mark.asyncio async def test_get_prometheus_format(self, metrics): """Test Prometheus format export.""" output = await metrics.get_prometheus_format() assert isinstance(output, str) assert "# TYPE" in output assert "counter" in output assert "gauge" in output assert "safety_validations_total" in output assert "safety_pending_approvals" in output @pytest.mark.asyncio async def test_prometheus_format_with_labels(self, metrics): """Test Prometheus format includes labels correctly.""" output = await metrics.get_prometheus_format() # Counter with labels assert "decision=allow" in output or "decision=deny" in output @pytest.mark.asyncio async def test_prometheus_format_histogram_buckets(self, metrics): """Test Prometheus format includes histogram buckets.""" output = await metrics.get_prometheus_format() assert "histogram" in output assert "_bucket" in output assert "le=" in output assert "+Inf" in output @pytest.mark.asyncio async def test_get_summary(self, metrics): """Test getting summary.""" summary = await metrics.get_summary() assert "total_validations" in summary assert "denied_validations" in summary assert "approval_requests" in summary assert "pending_approvals" in summary assert "active_checkpoints" in summary assert summary["total_validations"] == 2 assert summary["denied_validations"] == 1 assert summary["pending_approvals"] == 3 @pytest.mark.asyncio async def test_summary_empty_counters(self): """Test summary with no data.""" metrics = SafetyMetrics() summary = await metrics.get_summary() assert summary["total_validations"] == 0 assert summary["denied_validations"] == 0 assert summary["pending_approvals"] == 0 class TestSafetyMetricsReset: """Tests for SafetyMetrics reset.""" @pytest.mark.asyncio async def test_reset_clears_counters(self): """Test reset clears all counters.""" metrics = SafetyMetrics() await metrics.inc_validations("allow") await metrics.inc_approvals_granted() await metrics.set_pending_approvals(5) await metrics.observe_validation_latency(0.1) await metrics.reset() summary = await metrics.get_summary() assert summary["total_validations"] == 0 assert summary["approvals_granted"] == 0 assert summary["pending_approvals"] == 0 @pytest.mark.asyncio async def test_reset_reinitializes_histogram_buckets(self): """Test reset reinitializes histogram buckets.""" metrics = SafetyMetrics() await metrics.observe_validation_latency(0.1) await metrics.reset() # After reset, histogram buckets should be reinitialized prometheus = await metrics.get_prometheus_format() assert "validation_latency_seconds" in prometheus class TestParseLabels: """Tests for _parse_labels helper method.""" def test_parse_empty_labels(self): """Test parsing empty labels string.""" metrics = SafetyMetrics() result = metrics._parse_labels("") assert result == {} def test_parse_single_label(self): """Test parsing single label.""" metrics = SafetyMetrics() result = metrics._parse_labels("key=value") assert result == {"key": "value"} def test_parse_multiple_labels(self): """Test parsing multiple labels.""" metrics = SafetyMetrics() result = metrics._parse_labels("a=1,b=2,c=3") assert result == {"a": "1", "b": "2", "c": "3"} def test_parse_labels_with_spaces(self): """Test parsing labels with spaces.""" metrics = SafetyMetrics() result = metrics._parse_labels(" key = value , foo = bar ") assert result == {"key": "value", "foo": "bar"} def test_parse_labels_with_equals_in_value(self): """Test parsing labels with = in value.""" metrics = SafetyMetrics() result = metrics._parse_labels("query=a=b") assert result == {"query": "a=b"} def test_parse_invalid_label(self): """Test parsing invalid label without equals.""" metrics = SafetyMetrics() result = metrics._parse_labels("no_equals") assert result == {} class TestHistogramBucketInit: """Tests for histogram bucket initialization.""" def test_histogram_buckets_initialized(self): """Test that histogram buckets are initialized.""" metrics = SafetyMetrics() assert "validation_latency_seconds" in metrics._histogram_buckets assert "approval_latency_seconds" in metrics._histogram_buckets assert "mcp_execution_latency_seconds" in metrics._histogram_buckets def test_histogram_buckets_have_correct_values(self): """Test histogram buckets have correct boundary values.""" metrics = SafetyMetrics() buckets = metrics._histogram_buckets["validation_latency_seconds"] # Check first few and last bucket assert buckets[0].le == 0.01 assert buckets[1].le == 0.05 assert buckets[-1].le == float("inf") # Check all have zero initial count assert all(b.count == 0 for b in buckets) class TestSingletonAndConvenience: """Tests for singleton pattern and convenience functions.""" @pytest.mark.asyncio async def test_get_safety_metrics_returns_same_instance(self): """Test get_safety_metrics returns singleton.""" # Reset the module-level singleton for this test import app.services.safety.metrics.collector as collector_module collector_module._metrics = None m1 = await get_safety_metrics() m2 = await get_safety_metrics() assert m1 is m2 @pytest.mark.asyncio async def test_record_validation_convenience(self): """Test record_validation convenience function.""" import app.services.safety.metrics.collector as collector_module collector_module._metrics = None # Reset await record_validation("allow") await record_validation("deny", agent_id="test-agent") metrics = await get_safety_metrics() summary = await metrics.get_summary() assert summary["total_validations"] == 2 assert summary["denied_validations"] == 1 @pytest.mark.asyncio async def test_record_mcp_call_convenience(self): """Test record_mcp_call convenience function.""" import app.services.safety.metrics.collector as collector_module collector_module._metrics = None # Reset await record_mcp_call("search_knowledge", success=True, latency_ms=50) await record_mcp_call("run_code", success=False, latency_ms=100) metrics = await get_safety_metrics() summary = await metrics.get_summary() assert summary["mcp_calls"] == 2 class TestConcurrency: """Tests for concurrent metric updates.""" @pytest.mark.asyncio async def test_concurrent_counter_increments(self): """Test concurrent counter increments are safe.""" import asyncio metrics = SafetyMetrics() async def increment_many(): for _ in range(100): await metrics.inc_validations("allow") # Run 10 concurrent tasks each incrementing 100 times await asyncio.gather(*[increment_many() for _ in range(10)]) summary = await metrics.get_summary() assert summary["total_validations"] == 1000 @pytest.mark.asyncio async def test_concurrent_gauge_updates(self): """Test concurrent gauge updates are safe.""" import asyncio metrics = SafetyMetrics() async def update_gauge(value): await metrics.set_pending_approvals(value) # Run concurrent gauge updates await asyncio.gather(*[update_gauge(i) for i in range(100)]) # Final value should be one of the updates (last one wins) summary = await metrics.get_summary() assert 0 <= summary["pending_approvals"] < 100 @pytest.mark.asyncio async def test_concurrent_histogram_observations(self): """Test concurrent histogram observations are safe.""" import asyncio metrics = SafetyMetrics() async def observe_many(): for i in range(100): await metrics.observe_validation_latency(i / 1000) await asyncio.gather(*[observe_many() for _ in range(10)]) all_metrics = await metrics.get_all_metrics() count_metric = next( (m for m in all_metrics if m.name == "validation_latency_seconds_count"), None, ) assert count_metric is not None assert count_metric.value == 1000.0 class TestEdgeCases: """Tests for edge cases.""" @pytest.mark.asyncio async def test_very_large_counter_value(self): """Test handling very large counter values.""" metrics = SafetyMetrics() for _ in range(10000): await metrics.inc_validations("allow") summary = await metrics.get_summary() assert summary["total_validations"] == 10000 @pytest.mark.asyncio async def test_zero_and_negative_gauge_values(self): """Test zero and negative gauge values.""" metrics = SafetyMetrics() await metrics.set_budget_remaining("project", "cost", 0.0) await metrics.set_budget_remaining("project2", "cost", -10.0) all_metrics = await metrics.get_all_metrics() gauges = [m for m in all_metrics if m.name == "safety_budget_remaining"] values = {m.labels.get("scope"): m.value for m in gauges} assert values["project"] == 0.0 assert values["project2"] == -10.0 @pytest.mark.asyncio async def test_very_small_histogram_values(self): """Test very small histogram values.""" metrics = SafetyMetrics() await metrics.observe_validation_latency(0.0001) # 0.1ms all_metrics = await metrics.get_all_metrics() sum_metric = next( (m for m in all_metrics if m.name == "validation_latency_seconds_sum"), None, ) assert sum_metric is not None assert abs(sum_metric.value - 0.0001) < 0.00001 @pytest.mark.asyncio async def test_special_characters_in_labels(self): """Test special characters in label values.""" metrics = SafetyMetrics() await metrics.inc_validations("allow", agent_id="agent/with/slashes") all_metrics = await metrics.get_all_metrics() counters = [m for m in all_metrics if m.name == "safety_validations_total"] # Should have the metric with special chars assert len(counters) > 0 @pytest.mark.asyncio async def test_empty_histogram_export(self): """Test exporting histogram with no observations.""" metrics = SafetyMetrics() # No observations, but histogram buckets should still exist prometheus = await metrics.get_prometheus_format() assert "validation_latency_seconds" in prometheus assert "le=" in prometheus @pytest.mark.asyncio async def test_prometheus_format_empty_label_value(self): """Test Prometheus format with empty label metrics.""" metrics = SafetyMetrics() await metrics.inc_approvals_granted() # Uses empty string as label prometheus = await metrics.get_prometheus_format() assert "safety_approvals_granted_total" in prometheus @pytest.mark.asyncio async def test_multiple_resets(self): """Test multiple resets don't cause issues.""" metrics = SafetyMetrics() await metrics.inc_validations("allow") await metrics.reset() await metrics.reset() await metrics.reset() summary = await metrics.get_summary() assert summary["total_validations"] == 0