Files
fast-next-template/backend/tests/services/safety/test_metrics.py
Felipe Cardoso 60ebeaa582 test(safety): add comprehensive tests for safety framework modules
Add tests to improve backend coverage from 85% to 93%:

- test_audit.py: 60 tests for AuditLogger (20% -> 99%)
  - Hash chain integrity, sanitization, retention, handlers
  - Fixed bug: hash chain modification after event creation
  - Fixed bug: verification not using correct prev_hash

- test_hitl.py: Tests for HITL manager (0% -> 100%)
- test_permissions.py: Tests for permissions manager (0% -> 99%)
- test_rollback.py: Tests for rollback manager (0% -> 100%)
- test_metrics.py: Tests for metrics collector (0% -> 100%)
- test_mcp_integration.py: Tests for MCP safety wrapper (0% -> 100%)
- test_validation.py: Additional cache and edge case tests (76% -> 100%)
- test_scoring.py: Lock cleanup and edge case tests (78% -> 91%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 19:41:54 +01:00

748 lines
25 KiB
Python

"""
Tests for Safety Metrics Collector.
Tests cover:
- MetricType, MetricValue, HistogramBucket data structures
- SafetyMetrics counters, gauges, histograms
- Prometheus format export
- Summary and reset operations
- Singleton pattern and convenience functions
"""
import pytest
import pytest_asyncio
from app.services.safety.metrics.collector import (
HistogramBucket,
MetricType,
MetricValue,
SafetyMetrics,
get_safety_metrics,
record_mcp_call,
record_validation,
)
class TestMetricType:
"""Tests for MetricType enum."""
def test_metric_types_exist(self):
"""Test all metric types are defined."""
assert MetricType.COUNTER == "counter"
assert MetricType.GAUGE == "gauge"
assert MetricType.HISTOGRAM == "histogram"
def test_metric_type_is_string(self):
"""Test MetricType values are strings."""
assert isinstance(MetricType.COUNTER.value, str)
assert isinstance(MetricType.GAUGE.value, str)
assert isinstance(MetricType.HISTOGRAM.value, str)
class TestMetricValue:
"""Tests for MetricValue dataclass."""
def test_metric_value_creation(self):
"""Test creating a metric value."""
mv = MetricValue(
name="test_metric",
metric_type=MetricType.COUNTER,
value=42.0,
labels={"env": "test"},
)
assert mv.name == "test_metric"
assert mv.metric_type == MetricType.COUNTER
assert mv.value == 42.0
assert mv.labels == {"env": "test"}
assert mv.timestamp is not None
def test_metric_value_defaults(self):
"""Test metric value default values."""
mv = MetricValue(
name="test",
metric_type=MetricType.GAUGE,
value=0.0,
)
assert mv.labels == {}
assert mv.timestamp is not None
class TestHistogramBucket:
"""Tests for HistogramBucket dataclass."""
def test_histogram_bucket_creation(self):
"""Test creating a histogram bucket."""
bucket = HistogramBucket(le=0.5, count=10)
assert bucket.le == 0.5
assert bucket.count == 10
def test_histogram_bucket_defaults(self):
"""Test histogram bucket default count."""
bucket = HistogramBucket(le=1.0)
assert bucket.le == 1.0
assert bucket.count == 0
def test_histogram_bucket_infinity(self):
"""Test histogram bucket with infinity."""
bucket = HistogramBucket(le=float("inf"))
assert bucket.le == float("inf")
class TestSafetyMetricsCounters:
"""Tests for SafetyMetrics counter methods."""
@pytest_asyncio.fixture
async def metrics(self):
"""Create fresh metrics instance."""
return SafetyMetrics()
@pytest.mark.asyncio
async def test_inc_validations(self, metrics):
"""Test incrementing validations counter."""
await metrics.inc_validations("allow")
await metrics.inc_validations("allow")
await metrics.inc_validations("deny", agent_id="agent-1")
summary = await metrics.get_summary()
assert summary["total_validations"] == 3
assert summary["denied_validations"] == 1
@pytest.mark.asyncio
async def test_inc_approvals_requested(self, metrics):
"""Test incrementing approval requests counter."""
await metrics.inc_approvals_requested("normal")
await metrics.inc_approvals_requested("urgent")
await metrics.inc_approvals_requested() # default
summary = await metrics.get_summary()
assert summary["approval_requests"] == 3
@pytest.mark.asyncio
async def test_inc_approvals_granted(self, metrics):
"""Test incrementing approvals granted counter."""
await metrics.inc_approvals_granted()
await metrics.inc_approvals_granted()
summary = await metrics.get_summary()
assert summary["approvals_granted"] == 2
@pytest.mark.asyncio
async def test_inc_approvals_denied(self, metrics):
"""Test incrementing approvals denied counter."""
await metrics.inc_approvals_denied("timeout")
await metrics.inc_approvals_denied("policy")
await metrics.inc_approvals_denied() # default manual
summary = await metrics.get_summary()
assert summary["approvals_denied"] == 3
@pytest.mark.asyncio
async def test_inc_rate_limit_exceeded(self, metrics):
"""Test incrementing rate limit exceeded counter."""
await metrics.inc_rate_limit_exceeded("requests_per_minute")
await metrics.inc_rate_limit_exceeded("tokens_per_hour")
summary = await metrics.get_summary()
assert summary["rate_limit_hits"] == 2
@pytest.mark.asyncio
async def test_inc_budget_exceeded(self, metrics):
"""Test incrementing budget exceeded counter."""
await metrics.inc_budget_exceeded("daily_cost")
await metrics.inc_budget_exceeded("monthly_tokens")
summary = await metrics.get_summary()
assert summary["budget_exceeded"] == 2
@pytest.mark.asyncio
async def test_inc_loops_detected(self, metrics):
"""Test incrementing loops detected counter."""
await metrics.inc_loops_detected("repetition")
await metrics.inc_loops_detected("pattern")
summary = await metrics.get_summary()
assert summary["loops_detected"] == 2
@pytest.mark.asyncio
async def test_inc_emergency_events(self, metrics):
"""Test incrementing emergency events counter."""
await metrics.inc_emergency_events("pause", "project-1")
await metrics.inc_emergency_events("stop", "agent-2")
summary = await metrics.get_summary()
assert summary["emergency_events"] == 2
@pytest.mark.asyncio
async def test_inc_content_filtered(self, metrics):
"""Test incrementing content filtered counter."""
await metrics.inc_content_filtered("profanity", "blocked")
await metrics.inc_content_filtered("pii", "redacted")
summary = await metrics.get_summary()
assert summary["content_filtered"] == 2
@pytest.mark.asyncio
async def test_inc_checkpoints_created(self, metrics):
"""Test incrementing checkpoints created counter."""
await metrics.inc_checkpoints_created()
await metrics.inc_checkpoints_created()
await metrics.inc_checkpoints_created()
summary = await metrics.get_summary()
assert summary["checkpoints_created"] == 3
@pytest.mark.asyncio
async def test_inc_rollbacks_executed(self, metrics):
"""Test incrementing rollbacks executed counter."""
await metrics.inc_rollbacks_executed(success=True)
await metrics.inc_rollbacks_executed(success=False)
summary = await metrics.get_summary()
assert summary["rollbacks_executed"] == 2
@pytest.mark.asyncio
async def test_inc_mcp_calls(self, metrics):
"""Test incrementing MCP calls counter."""
await metrics.inc_mcp_calls("search_knowledge", success=True)
await metrics.inc_mcp_calls("run_code", success=False)
summary = await metrics.get_summary()
assert summary["mcp_calls"] == 2
class TestSafetyMetricsGauges:
"""Tests for SafetyMetrics gauge methods."""
@pytest_asyncio.fixture
async def metrics(self):
"""Create fresh metrics instance."""
return SafetyMetrics()
@pytest.mark.asyncio
async def test_set_budget_remaining(self, metrics):
"""Test setting budget remaining gauge."""
await metrics.set_budget_remaining("project-1", "daily_cost", 50.0)
all_metrics = await metrics.get_all_metrics()
gauge_metrics = [m for m in all_metrics if m.name == "safety_budget_remaining"]
assert len(gauge_metrics) == 1
assert gauge_metrics[0].value == 50.0
assert gauge_metrics[0].labels["scope"] == "project-1"
assert gauge_metrics[0].labels["budget_type"] == "daily_cost"
@pytest.mark.asyncio
async def test_set_rate_limit_remaining(self, metrics):
"""Test setting rate limit remaining gauge."""
await metrics.set_rate_limit_remaining("agent-1", "requests_per_minute", 45)
all_metrics = await metrics.get_all_metrics()
gauge_metrics = [
m for m in all_metrics if m.name == "safety_rate_limit_remaining"
]
assert len(gauge_metrics) == 1
assert gauge_metrics[0].value == 45.0
@pytest.mark.asyncio
async def test_set_pending_approvals(self, metrics):
"""Test setting pending approvals gauge."""
await metrics.set_pending_approvals(5)
summary = await metrics.get_summary()
assert summary["pending_approvals"] == 5
@pytest.mark.asyncio
async def test_set_active_checkpoints(self, metrics):
"""Test setting active checkpoints gauge."""
await metrics.set_active_checkpoints(3)
summary = await metrics.get_summary()
assert summary["active_checkpoints"] == 3
@pytest.mark.asyncio
async def test_set_emergency_state(self, metrics):
"""Test setting emergency state gauge."""
await metrics.set_emergency_state("project-1", "normal")
await metrics.set_emergency_state("project-2", "paused")
await metrics.set_emergency_state("project-3", "stopped")
await metrics.set_emergency_state("project-4", "unknown")
all_metrics = await metrics.get_all_metrics()
state_metrics = [m for m in all_metrics if m.name == "safety_emergency_state"]
assert len(state_metrics) == 4
# Check state values
values_by_scope = {m.labels["scope"]: m.value for m in state_metrics}
assert values_by_scope["project-1"] == 0.0 # normal
assert values_by_scope["project-2"] == 1.0 # paused
assert values_by_scope["project-3"] == 2.0 # stopped
assert values_by_scope["project-4"] == -1.0 # unknown
class TestSafetyMetricsHistograms:
"""Tests for SafetyMetrics histogram methods."""
@pytest_asyncio.fixture
async def metrics(self):
"""Create fresh metrics instance."""
return SafetyMetrics()
@pytest.mark.asyncio
async def test_observe_validation_latency(self, metrics):
"""Test observing validation latency."""
await metrics.observe_validation_latency(0.05)
await metrics.observe_validation_latency(0.15)
await metrics.observe_validation_latency(0.5)
all_metrics = await metrics.get_all_metrics()
count_metric = next(
(m for m in all_metrics if m.name == "validation_latency_seconds_count"),
None,
)
assert count_metric is not None
assert count_metric.value == 3.0
sum_metric = next(
(m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
None,
)
assert sum_metric is not None
assert abs(sum_metric.value - 0.7) < 0.001
@pytest.mark.asyncio
async def test_observe_approval_latency(self, metrics):
"""Test observing approval latency."""
await metrics.observe_approval_latency(1.5)
await metrics.observe_approval_latency(3.0)
all_metrics = await metrics.get_all_metrics()
count_metric = next(
(m for m in all_metrics if m.name == "approval_latency_seconds_count"),
None,
)
assert count_metric is not None
assert count_metric.value == 2.0
@pytest.mark.asyncio
async def test_observe_mcp_execution_latency(self, metrics):
"""Test observing MCP execution latency."""
await metrics.observe_mcp_execution_latency(0.02)
all_metrics = await metrics.get_all_metrics()
count_metric = next(
(m for m in all_metrics if m.name == "mcp_execution_latency_seconds_count"),
None,
)
assert count_metric is not None
assert count_metric.value == 1.0
@pytest.mark.asyncio
async def test_histogram_bucket_updates(self, metrics):
"""Test that histogram buckets are updated correctly."""
# Add values to test bucket distribution
await metrics.observe_validation_latency(0.005) # <= 0.01
await metrics.observe_validation_latency(0.03) # <= 0.05
await metrics.observe_validation_latency(0.07) # <= 0.1
await metrics.observe_validation_latency(15.0) # <= inf
prometheus = await metrics.get_prometheus_format()
# Check that bucket counts are in output
assert "validation_latency_seconds_bucket" in prometheus
assert "le=" in prometheus
class TestSafetyMetricsExport:
"""Tests for SafetyMetrics export methods."""
@pytest_asyncio.fixture
async def metrics(self):
"""Create fresh metrics instance with some data."""
m = SafetyMetrics()
# Add some counters
await m.inc_validations("allow")
await m.inc_validations("deny", agent_id="agent-1")
# Add some gauges
await m.set_pending_approvals(3)
await m.set_budget_remaining("proj-1", "daily", 100.0)
# Add some histogram values
await m.observe_validation_latency(0.1)
return m
@pytest.mark.asyncio
async def test_get_all_metrics(self, metrics):
"""Test getting all metrics."""
all_metrics = await metrics.get_all_metrics()
assert len(all_metrics) > 0
assert all(isinstance(m, MetricValue) for m in all_metrics)
# Check we have different types
types = {m.metric_type for m in all_metrics}
assert MetricType.COUNTER in types
assert MetricType.GAUGE in types
@pytest.mark.asyncio
async def test_get_prometheus_format(self, metrics):
"""Test Prometheus format export."""
output = await metrics.get_prometheus_format()
assert isinstance(output, str)
assert "# TYPE" in output
assert "counter" in output
assert "gauge" in output
assert "safety_validations_total" in output
assert "safety_pending_approvals" in output
@pytest.mark.asyncio
async def test_prometheus_format_with_labels(self, metrics):
"""Test Prometheus format includes labels correctly."""
output = await metrics.get_prometheus_format()
# Counter with labels
assert "decision=allow" in output or "decision=deny" in output
@pytest.mark.asyncio
async def test_prometheus_format_histogram_buckets(self, metrics):
"""Test Prometheus format includes histogram buckets."""
output = await metrics.get_prometheus_format()
assert "histogram" in output
assert "_bucket" in output
assert "le=" in output
assert "+Inf" in output
@pytest.mark.asyncio
async def test_get_summary(self, metrics):
"""Test getting summary."""
summary = await metrics.get_summary()
assert "total_validations" in summary
assert "denied_validations" in summary
assert "approval_requests" in summary
assert "pending_approvals" in summary
assert "active_checkpoints" in summary
assert summary["total_validations"] == 2
assert summary["denied_validations"] == 1
assert summary["pending_approvals"] == 3
@pytest.mark.asyncio
async def test_summary_empty_counters(self):
"""Test summary with no data."""
metrics = SafetyMetrics()
summary = await metrics.get_summary()
assert summary["total_validations"] == 0
assert summary["denied_validations"] == 0
assert summary["pending_approvals"] == 0
class TestSafetyMetricsReset:
"""Tests for SafetyMetrics reset."""
@pytest.mark.asyncio
async def test_reset_clears_counters(self):
"""Test reset clears all counters."""
metrics = SafetyMetrics()
await metrics.inc_validations("allow")
await metrics.inc_approvals_granted()
await metrics.set_pending_approvals(5)
await metrics.observe_validation_latency(0.1)
await metrics.reset()
summary = await metrics.get_summary()
assert summary["total_validations"] == 0
assert summary["approvals_granted"] == 0
assert summary["pending_approvals"] == 0
@pytest.mark.asyncio
async def test_reset_reinitializes_histogram_buckets(self):
"""Test reset reinitializes histogram buckets."""
metrics = SafetyMetrics()
await metrics.observe_validation_latency(0.1)
await metrics.reset()
# After reset, histogram buckets should be reinitialized
prometheus = await metrics.get_prometheus_format()
assert "validation_latency_seconds" in prometheus
class TestParseLabels:
"""Tests for _parse_labels helper method."""
def test_parse_empty_labels(self):
"""Test parsing empty labels string."""
metrics = SafetyMetrics()
result = metrics._parse_labels("")
assert result == {}
def test_parse_single_label(self):
"""Test parsing single label."""
metrics = SafetyMetrics()
result = metrics._parse_labels("key=value")
assert result == {"key": "value"}
def test_parse_multiple_labels(self):
"""Test parsing multiple labels."""
metrics = SafetyMetrics()
result = metrics._parse_labels("a=1,b=2,c=3")
assert result == {"a": "1", "b": "2", "c": "3"}
def test_parse_labels_with_spaces(self):
"""Test parsing labels with spaces."""
metrics = SafetyMetrics()
result = metrics._parse_labels(" key = value , foo = bar ")
assert result == {"key": "value", "foo": "bar"}
def test_parse_labels_with_equals_in_value(self):
"""Test parsing labels with = in value."""
metrics = SafetyMetrics()
result = metrics._parse_labels("query=a=b")
assert result == {"query": "a=b"}
def test_parse_invalid_label(self):
"""Test parsing invalid label without equals."""
metrics = SafetyMetrics()
result = metrics._parse_labels("no_equals")
assert result == {}
class TestHistogramBucketInit:
"""Tests for histogram bucket initialization."""
def test_histogram_buckets_initialized(self):
"""Test that histogram buckets are initialized."""
metrics = SafetyMetrics()
assert "validation_latency_seconds" in metrics._histogram_buckets
assert "approval_latency_seconds" in metrics._histogram_buckets
assert "mcp_execution_latency_seconds" in metrics._histogram_buckets
def test_histogram_buckets_have_correct_values(self):
"""Test histogram buckets have correct boundary values."""
metrics = SafetyMetrics()
buckets = metrics._histogram_buckets["validation_latency_seconds"]
# Check first few and last bucket
assert buckets[0].le == 0.01
assert buckets[1].le == 0.05
assert buckets[-1].le == float("inf")
# Check all have zero initial count
assert all(b.count == 0 for b in buckets)
class TestSingletonAndConvenience:
"""Tests for singleton pattern and convenience functions."""
@pytest.mark.asyncio
async def test_get_safety_metrics_returns_same_instance(self):
"""Test get_safety_metrics returns singleton."""
# Reset the module-level singleton for this test
import app.services.safety.metrics.collector as collector_module
collector_module._metrics = None
m1 = await get_safety_metrics()
m2 = await get_safety_metrics()
assert m1 is m2
@pytest.mark.asyncio
async def test_record_validation_convenience(self):
"""Test record_validation convenience function."""
import app.services.safety.metrics.collector as collector_module
collector_module._metrics = None # Reset
await record_validation("allow")
await record_validation("deny", agent_id="test-agent")
metrics = await get_safety_metrics()
summary = await metrics.get_summary()
assert summary["total_validations"] == 2
assert summary["denied_validations"] == 1
@pytest.mark.asyncio
async def test_record_mcp_call_convenience(self):
"""Test record_mcp_call convenience function."""
import app.services.safety.metrics.collector as collector_module
collector_module._metrics = None # Reset
await record_mcp_call("search_knowledge", success=True, latency_ms=50)
await record_mcp_call("run_code", success=False, latency_ms=100)
metrics = await get_safety_metrics()
summary = await metrics.get_summary()
assert summary["mcp_calls"] == 2
class TestConcurrency:
"""Tests for concurrent metric updates."""
@pytest.mark.asyncio
async def test_concurrent_counter_increments(self):
"""Test concurrent counter increments are safe."""
import asyncio
metrics = SafetyMetrics()
async def increment_many():
for _ in range(100):
await metrics.inc_validations("allow")
# Run 10 concurrent tasks each incrementing 100 times
await asyncio.gather(*[increment_many() for _ in range(10)])
summary = await metrics.get_summary()
assert summary["total_validations"] == 1000
@pytest.mark.asyncio
async def test_concurrent_gauge_updates(self):
"""Test concurrent gauge updates are safe."""
import asyncio
metrics = SafetyMetrics()
async def update_gauge(value):
await metrics.set_pending_approvals(value)
# Run concurrent gauge updates
await asyncio.gather(*[update_gauge(i) for i in range(100)])
# Final value should be one of the updates (last one wins)
summary = await metrics.get_summary()
assert 0 <= summary["pending_approvals"] < 100
@pytest.mark.asyncio
async def test_concurrent_histogram_observations(self):
"""Test concurrent histogram observations are safe."""
import asyncio
metrics = SafetyMetrics()
async def observe_many():
for i in range(100):
await metrics.observe_validation_latency(i / 1000)
await asyncio.gather(*[observe_many() for _ in range(10)])
all_metrics = await metrics.get_all_metrics()
count_metric = next(
(m for m in all_metrics if m.name == "validation_latency_seconds_count"),
None,
)
assert count_metric is not None
assert count_metric.value == 1000.0
class TestEdgeCases:
"""Tests for edge cases."""
@pytest.mark.asyncio
async def test_very_large_counter_value(self):
"""Test handling very large counter values."""
metrics = SafetyMetrics()
for _ in range(10000):
await metrics.inc_validations("allow")
summary = await metrics.get_summary()
assert summary["total_validations"] == 10000
@pytest.mark.asyncio
async def test_zero_and_negative_gauge_values(self):
"""Test zero and negative gauge values."""
metrics = SafetyMetrics()
await metrics.set_budget_remaining("project", "cost", 0.0)
await metrics.set_budget_remaining("project2", "cost", -10.0)
all_metrics = await metrics.get_all_metrics()
gauges = [m for m in all_metrics if m.name == "safety_budget_remaining"]
values = {m.labels.get("scope"): m.value for m in gauges}
assert values["project"] == 0.0
assert values["project2"] == -10.0
@pytest.mark.asyncio
async def test_very_small_histogram_values(self):
"""Test very small histogram values."""
metrics = SafetyMetrics()
await metrics.observe_validation_latency(0.0001) # 0.1ms
all_metrics = await metrics.get_all_metrics()
sum_metric = next(
(m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
None,
)
assert sum_metric is not None
assert abs(sum_metric.value - 0.0001) < 0.00001
@pytest.mark.asyncio
async def test_special_characters_in_labels(self):
"""Test special characters in label values."""
metrics = SafetyMetrics()
await metrics.inc_validations("allow", agent_id="agent/with/slashes")
all_metrics = await metrics.get_all_metrics()
counters = [m for m in all_metrics if m.name == "safety_validations_total"]
# Should have the metric with special chars
assert len(counters) > 0
@pytest.mark.asyncio
async def test_empty_histogram_export(self):
"""Test exporting histogram with no observations."""
metrics = SafetyMetrics()
# No observations, but histogram buckets should still exist
prometheus = await metrics.get_prometheus_format()
assert "validation_latency_seconds" in prometheus
assert "le=" in prometheus
@pytest.mark.asyncio
async def test_prometheus_format_empty_label_value(self):
"""Test Prometheus format with empty label metrics."""
metrics = SafetyMetrics()
await metrics.inc_approvals_granted() # Uses empty string as label
prometheus = await metrics.get_prometheus_format()
assert "safety_approvals_granted_total" in prometheus
@pytest.mark.asyncio
async def test_multiple_resets(self):
"""Test multiple resets don't cause issues."""
metrics = SafetyMetrics()
await metrics.inc_validations("allow")
await metrics.reset()
await metrics.reset()
await metrics.reset()
summary = await metrics.get_summary()
assert summary["total_validations"] == 0