Add tests to improve backend coverage from 85% to 93%: - test_audit.py: 60 tests for AuditLogger (20% -> 99%) - Hash chain integrity, sanitization, retention, handlers - Fixed bug: hash chain modification after event creation - Fixed bug: verification not using correct prev_hash - test_hitl.py: Tests for HITL manager (0% -> 100%) - test_permissions.py: Tests for permissions manager (0% -> 99%) - test_rollback.py: Tests for rollback manager (0% -> 100%) - test_metrics.py: Tests for metrics collector (0% -> 100%) - test_mcp_integration.py: Tests for MCP safety wrapper (0% -> 100%) - test_validation.py: Additional cache and edge case tests (76% -> 100%) - test_scoring.py: Lock cleanup and edge case tests (78% -> 91%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
748 lines
25 KiB
Python
748 lines
25 KiB
Python
"""
|
|
Tests for Safety Metrics Collector.
|
|
|
|
Tests cover:
|
|
- MetricType, MetricValue, HistogramBucket data structures
|
|
- SafetyMetrics counters, gauges, histograms
|
|
- Prometheus format export
|
|
- Summary and reset operations
|
|
- Singleton pattern and convenience functions
|
|
"""
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from app.services.safety.metrics.collector import (
|
|
HistogramBucket,
|
|
MetricType,
|
|
MetricValue,
|
|
SafetyMetrics,
|
|
get_safety_metrics,
|
|
record_mcp_call,
|
|
record_validation,
|
|
)
|
|
|
|
|
|
class TestMetricType:
|
|
"""Tests for MetricType enum."""
|
|
|
|
def test_metric_types_exist(self):
|
|
"""Test all metric types are defined."""
|
|
assert MetricType.COUNTER == "counter"
|
|
assert MetricType.GAUGE == "gauge"
|
|
assert MetricType.HISTOGRAM == "histogram"
|
|
|
|
def test_metric_type_is_string(self):
|
|
"""Test MetricType values are strings."""
|
|
assert isinstance(MetricType.COUNTER.value, str)
|
|
assert isinstance(MetricType.GAUGE.value, str)
|
|
assert isinstance(MetricType.HISTOGRAM.value, str)
|
|
|
|
|
|
class TestMetricValue:
|
|
"""Tests for MetricValue dataclass."""
|
|
|
|
def test_metric_value_creation(self):
|
|
"""Test creating a metric value."""
|
|
mv = MetricValue(
|
|
name="test_metric",
|
|
metric_type=MetricType.COUNTER,
|
|
value=42.0,
|
|
labels={"env": "test"},
|
|
)
|
|
|
|
assert mv.name == "test_metric"
|
|
assert mv.metric_type == MetricType.COUNTER
|
|
assert mv.value == 42.0
|
|
assert mv.labels == {"env": "test"}
|
|
assert mv.timestamp is not None
|
|
|
|
def test_metric_value_defaults(self):
|
|
"""Test metric value default values."""
|
|
mv = MetricValue(
|
|
name="test",
|
|
metric_type=MetricType.GAUGE,
|
|
value=0.0,
|
|
)
|
|
|
|
assert mv.labels == {}
|
|
assert mv.timestamp is not None
|
|
|
|
|
|
class TestHistogramBucket:
|
|
"""Tests for HistogramBucket dataclass."""
|
|
|
|
def test_histogram_bucket_creation(self):
|
|
"""Test creating a histogram bucket."""
|
|
bucket = HistogramBucket(le=0.5, count=10)
|
|
|
|
assert bucket.le == 0.5
|
|
assert bucket.count == 10
|
|
|
|
def test_histogram_bucket_defaults(self):
|
|
"""Test histogram bucket default count."""
|
|
bucket = HistogramBucket(le=1.0)
|
|
|
|
assert bucket.le == 1.0
|
|
assert bucket.count == 0
|
|
|
|
def test_histogram_bucket_infinity(self):
|
|
"""Test histogram bucket with infinity."""
|
|
bucket = HistogramBucket(le=float("inf"))
|
|
|
|
assert bucket.le == float("inf")
|
|
|
|
|
|
class TestSafetyMetricsCounters:
|
|
"""Tests for SafetyMetrics counter methods."""
|
|
|
|
@pytest_asyncio.fixture
|
|
async def metrics(self):
|
|
"""Create fresh metrics instance."""
|
|
return SafetyMetrics()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_validations(self, metrics):
|
|
"""Test incrementing validations counter."""
|
|
await metrics.inc_validations("allow")
|
|
await metrics.inc_validations("allow")
|
|
await metrics.inc_validations("deny", agent_id="agent-1")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["total_validations"] == 3
|
|
assert summary["denied_validations"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_approvals_requested(self, metrics):
|
|
"""Test incrementing approval requests counter."""
|
|
await metrics.inc_approvals_requested("normal")
|
|
await metrics.inc_approvals_requested("urgent")
|
|
await metrics.inc_approvals_requested() # default
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["approval_requests"] == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_approvals_granted(self, metrics):
|
|
"""Test incrementing approvals granted counter."""
|
|
await metrics.inc_approvals_granted()
|
|
await metrics.inc_approvals_granted()
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["approvals_granted"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_approvals_denied(self, metrics):
|
|
"""Test incrementing approvals denied counter."""
|
|
await metrics.inc_approvals_denied("timeout")
|
|
await metrics.inc_approvals_denied("policy")
|
|
await metrics.inc_approvals_denied() # default manual
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["approvals_denied"] == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_rate_limit_exceeded(self, metrics):
|
|
"""Test incrementing rate limit exceeded counter."""
|
|
await metrics.inc_rate_limit_exceeded("requests_per_minute")
|
|
await metrics.inc_rate_limit_exceeded("tokens_per_hour")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["rate_limit_hits"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_budget_exceeded(self, metrics):
|
|
"""Test incrementing budget exceeded counter."""
|
|
await metrics.inc_budget_exceeded("daily_cost")
|
|
await metrics.inc_budget_exceeded("monthly_tokens")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["budget_exceeded"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_loops_detected(self, metrics):
|
|
"""Test incrementing loops detected counter."""
|
|
await metrics.inc_loops_detected("repetition")
|
|
await metrics.inc_loops_detected("pattern")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["loops_detected"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_emergency_events(self, metrics):
|
|
"""Test incrementing emergency events counter."""
|
|
await metrics.inc_emergency_events("pause", "project-1")
|
|
await metrics.inc_emergency_events("stop", "agent-2")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["emergency_events"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_content_filtered(self, metrics):
|
|
"""Test incrementing content filtered counter."""
|
|
await metrics.inc_content_filtered("profanity", "blocked")
|
|
await metrics.inc_content_filtered("pii", "redacted")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["content_filtered"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_checkpoints_created(self, metrics):
|
|
"""Test incrementing checkpoints created counter."""
|
|
await metrics.inc_checkpoints_created()
|
|
await metrics.inc_checkpoints_created()
|
|
await metrics.inc_checkpoints_created()
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["checkpoints_created"] == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_rollbacks_executed(self, metrics):
|
|
"""Test incrementing rollbacks executed counter."""
|
|
await metrics.inc_rollbacks_executed(success=True)
|
|
await metrics.inc_rollbacks_executed(success=False)
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["rollbacks_executed"] == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_inc_mcp_calls(self, metrics):
|
|
"""Test incrementing MCP calls counter."""
|
|
await metrics.inc_mcp_calls("search_knowledge", success=True)
|
|
await metrics.inc_mcp_calls("run_code", success=False)
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["mcp_calls"] == 2
|
|
|
|
|
|
class TestSafetyMetricsGauges:
|
|
"""Tests for SafetyMetrics gauge methods."""
|
|
|
|
@pytest_asyncio.fixture
|
|
async def metrics(self):
|
|
"""Create fresh metrics instance."""
|
|
return SafetyMetrics()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_set_budget_remaining(self, metrics):
|
|
"""Test setting budget remaining gauge."""
|
|
await metrics.set_budget_remaining("project-1", "daily_cost", 50.0)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
gauge_metrics = [m for m in all_metrics if m.name == "safety_budget_remaining"]
|
|
assert len(gauge_metrics) == 1
|
|
assert gauge_metrics[0].value == 50.0
|
|
assert gauge_metrics[0].labels["scope"] == "project-1"
|
|
assert gauge_metrics[0].labels["budget_type"] == "daily_cost"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_set_rate_limit_remaining(self, metrics):
|
|
"""Test setting rate limit remaining gauge."""
|
|
await metrics.set_rate_limit_remaining("agent-1", "requests_per_minute", 45)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
gauge_metrics = [
|
|
m for m in all_metrics if m.name == "safety_rate_limit_remaining"
|
|
]
|
|
assert len(gauge_metrics) == 1
|
|
assert gauge_metrics[0].value == 45.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_set_pending_approvals(self, metrics):
|
|
"""Test setting pending approvals gauge."""
|
|
await metrics.set_pending_approvals(5)
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["pending_approvals"] == 5
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_set_active_checkpoints(self, metrics):
|
|
"""Test setting active checkpoints gauge."""
|
|
await metrics.set_active_checkpoints(3)
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["active_checkpoints"] == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_set_emergency_state(self, metrics):
|
|
"""Test setting emergency state gauge."""
|
|
await metrics.set_emergency_state("project-1", "normal")
|
|
await metrics.set_emergency_state("project-2", "paused")
|
|
await metrics.set_emergency_state("project-3", "stopped")
|
|
await metrics.set_emergency_state("project-4", "unknown")
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
state_metrics = [m for m in all_metrics if m.name == "safety_emergency_state"]
|
|
assert len(state_metrics) == 4
|
|
|
|
# Check state values
|
|
values_by_scope = {m.labels["scope"]: m.value for m in state_metrics}
|
|
assert values_by_scope["project-1"] == 0.0 # normal
|
|
assert values_by_scope["project-2"] == 1.0 # paused
|
|
assert values_by_scope["project-3"] == 2.0 # stopped
|
|
assert values_by_scope["project-4"] == -1.0 # unknown
|
|
|
|
|
|
class TestSafetyMetricsHistograms:
|
|
"""Tests for SafetyMetrics histogram methods."""
|
|
|
|
@pytest_asyncio.fixture
|
|
async def metrics(self):
|
|
"""Create fresh metrics instance."""
|
|
return SafetyMetrics()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_observe_validation_latency(self, metrics):
|
|
"""Test observing validation latency."""
|
|
await metrics.observe_validation_latency(0.05)
|
|
await metrics.observe_validation_latency(0.15)
|
|
await metrics.observe_validation_latency(0.5)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
|
|
count_metric = next(
|
|
(m for m in all_metrics if m.name == "validation_latency_seconds_count"),
|
|
None,
|
|
)
|
|
assert count_metric is not None
|
|
assert count_metric.value == 3.0
|
|
|
|
sum_metric = next(
|
|
(m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
|
|
None,
|
|
)
|
|
assert sum_metric is not None
|
|
assert abs(sum_metric.value - 0.7) < 0.001
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_observe_approval_latency(self, metrics):
|
|
"""Test observing approval latency."""
|
|
await metrics.observe_approval_latency(1.5)
|
|
await metrics.observe_approval_latency(3.0)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
|
|
count_metric = next(
|
|
(m for m in all_metrics if m.name == "approval_latency_seconds_count"),
|
|
None,
|
|
)
|
|
assert count_metric is not None
|
|
assert count_metric.value == 2.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_observe_mcp_execution_latency(self, metrics):
|
|
"""Test observing MCP execution latency."""
|
|
await metrics.observe_mcp_execution_latency(0.02)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
|
|
count_metric = next(
|
|
(m for m in all_metrics if m.name == "mcp_execution_latency_seconds_count"),
|
|
None,
|
|
)
|
|
assert count_metric is not None
|
|
assert count_metric.value == 1.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_histogram_bucket_updates(self, metrics):
|
|
"""Test that histogram buckets are updated correctly."""
|
|
# Add values to test bucket distribution
|
|
await metrics.observe_validation_latency(0.005) # <= 0.01
|
|
await metrics.observe_validation_latency(0.03) # <= 0.05
|
|
await metrics.observe_validation_latency(0.07) # <= 0.1
|
|
await metrics.observe_validation_latency(15.0) # <= inf
|
|
|
|
prometheus = await metrics.get_prometheus_format()
|
|
|
|
# Check that bucket counts are in output
|
|
assert "validation_latency_seconds_bucket" in prometheus
|
|
assert "le=" in prometheus
|
|
|
|
|
|
class TestSafetyMetricsExport:
|
|
"""Tests for SafetyMetrics export methods."""
|
|
|
|
@pytest_asyncio.fixture
|
|
async def metrics(self):
|
|
"""Create fresh metrics instance with some data."""
|
|
m = SafetyMetrics()
|
|
|
|
# Add some counters
|
|
await m.inc_validations("allow")
|
|
await m.inc_validations("deny", agent_id="agent-1")
|
|
|
|
# Add some gauges
|
|
await m.set_pending_approvals(3)
|
|
await m.set_budget_remaining("proj-1", "daily", 100.0)
|
|
|
|
# Add some histogram values
|
|
await m.observe_validation_latency(0.1)
|
|
|
|
return m
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_all_metrics(self, metrics):
|
|
"""Test getting all metrics."""
|
|
all_metrics = await metrics.get_all_metrics()
|
|
|
|
assert len(all_metrics) > 0
|
|
assert all(isinstance(m, MetricValue) for m in all_metrics)
|
|
|
|
# Check we have different types
|
|
types = {m.metric_type for m in all_metrics}
|
|
assert MetricType.COUNTER in types
|
|
assert MetricType.GAUGE in types
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_prometheus_format(self, metrics):
|
|
"""Test Prometheus format export."""
|
|
output = await metrics.get_prometheus_format()
|
|
|
|
assert isinstance(output, str)
|
|
assert "# TYPE" in output
|
|
assert "counter" in output
|
|
assert "gauge" in output
|
|
assert "safety_validations_total" in output
|
|
assert "safety_pending_approvals" in output
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prometheus_format_with_labels(self, metrics):
|
|
"""Test Prometheus format includes labels correctly."""
|
|
output = await metrics.get_prometheus_format()
|
|
|
|
# Counter with labels
|
|
assert "decision=allow" in output or "decision=deny" in output
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prometheus_format_histogram_buckets(self, metrics):
|
|
"""Test Prometheus format includes histogram buckets."""
|
|
output = await metrics.get_prometheus_format()
|
|
|
|
assert "histogram" in output
|
|
assert "_bucket" in output
|
|
assert "le=" in output
|
|
assert "+Inf" in output
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_summary(self, metrics):
|
|
"""Test getting summary."""
|
|
summary = await metrics.get_summary()
|
|
|
|
assert "total_validations" in summary
|
|
assert "denied_validations" in summary
|
|
assert "approval_requests" in summary
|
|
assert "pending_approvals" in summary
|
|
assert "active_checkpoints" in summary
|
|
|
|
assert summary["total_validations"] == 2
|
|
assert summary["denied_validations"] == 1
|
|
assert summary["pending_approvals"] == 3
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_summary_empty_counters(self):
|
|
"""Test summary with no data."""
|
|
metrics = SafetyMetrics()
|
|
summary = await metrics.get_summary()
|
|
|
|
assert summary["total_validations"] == 0
|
|
assert summary["denied_validations"] == 0
|
|
assert summary["pending_approvals"] == 0
|
|
|
|
|
|
class TestSafetyMetricsReset:
|
|
"""Tests for SafetyMetrics reset."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reset_clears_counters(self):
|
|
"""Test reset clears all counters."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.inc_validations("allow")
|
|
await metrics.inc_approvals_granted()
|
|
await metrics.set_pending_approvals(5)
|
|
await metrics.observe_validation_latency(0.1)
|
|
|
|
await metrics.reset()
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["total_validations"] == 0
|
|
assert summary["approvals_granted"] == 0
|
|
assert summary["pending_approvals"] == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_reset_reinitializes_histogram_buckets(self):
|
|
"""Test reset reinitializes histogram buckets."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.observe_validation_latency(0.1)
|
|
await metrics.reset()
|
|
|
|
# After reset, histogram buckets should be reinitialized
|
|
prometheus = await metrics.get_prometheus_format()
|
|
assert "validation_latency_seconds" in prometheus
|
|
|
|
|
|
class TestParseLabels:
|
|
"""Tests for _parse_labels helper method."""
|
|
|
|
def test_parse_empty_labels(self):
|
|
"""Test parsing empty labels string."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels("")
|
|
assert result == {}
|
|
|
|
def test_parse_single_label(self):
|
|
"""Test parsing single label."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels("key=value")
|
|
assert result == {"key": "value"}
|
|
|
|
def test_parse_multiple_labels(self):
|
|
"""Test parsing multiple labels."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels("a=1,b=2,c=3")
|
|
assert result == {"a": "1", "b": "2", "c": "3"}
|
|
|
|
def test_parse_labels_with_spaces(self):
|
|
"""Test parsing labels with spaces."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels(" key = value , foo = bar ")
|
|
assert result == {"key": "value", "foo": "bar"}
|
|
|
|
def test_parse_labels_with_equals_in_value(self):
|
|
"""Test parsing labels with = in value."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels("query=a=b")
|
|
assert result == {"query": "a=b"}
|
|
|
|
def test_parse_invalid_label(self):
|
|
"""Test parsing invalid label without equals."""
|
|
metrics = SafetyMetrics()
|
|
result = metrics._parse_labels("no_equals")
|
|
assert result == {}
|
|
|
|
|
|
class TestHistogramBucketInit:
|
|
"""Tests for histogram bucket initialization."""
|
|
|
|
def test_histogram_buckets_initialized(self):
|
|
"""Test that histogram buckets are initialized."""
|
|
metrics = SafetyMetrics()
|
|
|
|
assert "validation_latency_seconds" in metrics._histogram_buckets
|
|
assert "approval_latency_seconds" in metrics._histogram_buckets
|
|
assert "mcp_execution_latency_seconds" in metrics._histogram_buckets
|
|
|
|
def test_histogram_buckets_have_correct_values(self):
|
|
"""Test histogram buckets have correct boundary values."""
|
|
metrics = SafetyMetrics()
|
|
|
|
buckets = metrics._histogram_buckets["validation_latency_seconds"]
|
|
|
|
# Check first few and last bucket
|
|
assert buckets[0].le == 0.01
|
|
assert buckets[1].le == 0.05
|
|
assert buckets[-1].le == float("inf")
|
|
|
|
# Check all have zero initial count
|
|
assert all(b.count == 0 for b in buckets)
|
|
|
|
|
|
class TestSingletonAndConvenience:
|
|
"""Tests for singleton pattern and convenience functions."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_safety_metrics_returns_same_instance(self):
|
|
"""Test get_safety_metrics returns singleton."""
|
|
# Reset the module-level singleton for this test
|
|
import app.services.safety.metrics.collector as collector_module
|
|
|
|
collector_module._metrics = None
|
|
|
|
m1 = await get_safety_metrics()
|
|
m2 = await get_safety_metrics()
|
|
|
|
assert m1 is m2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_validation_convenience(self):
|
|
"""Test record_validation convenience function."""
|
|
import app.services.safety.metrics.collector as collector_module
|
|
|
|
collector_module._metrics = None # Reset
|
|
|
|
await record_validation("allow")
|
|
await record_validation("deny", agent_id="test-agent")
|
|
|
|
metrics = await get_safety_metrics()
|
|
summary = await metrics.get_summary()
|
|
|
|
assert summary["total_validations"] == 2
|
|
assert summary["denied_validations"] == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_mcp_call_convenience(self):
|
|
"""Test record_mcp_call convenience function."""
|
|
import app.services.safety.metrics.collector as collector_module
|
|
|
|
collector_module._metrics = None # Reset
|
|
|
|
await record_mcp_call("search_knowledge", success=True, latency_ms=50)
|
|
await record_mcp_call("run_code", success=False, latency_ms=100)
|
|
|
|
metrics = await get_safety_metrics()
|
|
summary = await metrics.get_summary()
|
|
|
|
assert summary["mcp_calls"] == 2
|
|
|
|
|
|
class TestConcurrency:
|
|
"""Tests for concurrent metric updates."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_counter_increments(self):
|
|
"""Test concurrent counter increments are safe."""
|
|
import asyncio
|
|
|
|
metrics = SafetyMetrics()
|
|
|
|
async def increment_many():
|
|
for _ in range(100):
|
|
await metrics.inc_validations("allow")
|
|
|
|
# Run 10 concurrent tasks each incrementing 100 times
|
|
await asyncio.gather(*[increment_many() for _ in range(10)])
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["total_validations"] == 1000
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_gauge_updates(self):
|
|
"""Test concurrent gauge updates are safe."""
|
|
import asyncio
|
|
|
|
metrics = SafetyMetrics()
|
|
|
|
async def update_gauge(value):
|
|
await metrics.set_pending_approvals(value)
|
|
|
|
# Run concurrent gauge updates
|
|
await asyncio.gather(*[update_gauge(i) for i in range(100)])
|
|
|
|
# Final value should be one of the updates (last one wins)
|
|
summary = await metrics.get_summary()
|
|
assert 0 <= summary["pending_approvals"] < 100
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_histogram_observations(self):
|
|
"""Test concurrent histogram observations are safe."""
|
|
import asyncio
|
|
|
|
metrics = SafetyMetrics()
|
|
|
|
async def observe_many():
|
|
for i in range(100):
|
|
await metrics.observe_validation_latency(i / 1000)
|
|
|
|
await asyncio.gather(*[observe_many() for _ in range(10)])
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
count_metric = next(
|
|
(m for m in all_metrics if m.name == "validation_latency_seconds_count"),
|
|
None,
|
|
)
|
|
assert count_metric is not None
|
|
assert count_metric.value == 1000.0
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Tests for edge cases."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_very_large_counter_value(self):
|
|
"""Test handling very large counter values."""
|
|
metrics = SafetyMetrics()
|
|
|
|
for _ in range(10000):
|
|
await metrics.inc_validations("allow")
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["total_validations"] == 10000
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_zero_and_negative_gauge_values(self):
|
|
"""Test zero and negative gauge values."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.set_budget_remaining("project", "cost", 0.0)
|
|
await metrics.set_budget_remaining("project2", "cost", -10.0)
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
gauges = [m for m in all_metrics if m.name == "safety_budget_remaining"]
|
|
|
|
values = {m.labels.get("scope"): m.value for m in gauges}
|
|
assert values["project"] == 0.0
|
|
assert values["project2"] == -10.0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_very_small_histogram_values(self):
|
|
"""Test very small histogram values."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.observe_validation_latency(0.0001) # 0.1ms
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
sum_metric = next(
|
|
(m for m in all_metrics if m.name == "validation_latency_seconds_sum"),
|
|
None,
|
|
)
|
|
assert sum_metric is not None
|
|
assert abs(sum_metric.value - 0.0001) < 0.00001
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_special_characters_in_labels(self):
|
|
"""Test special characters in label values."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.inc_validations("allow", agent_id="agent/with/slashes")
|
|
|
|
all_metrics = await metrics.get_all_metrics()
|
|
counters = [m for m in all_metrics if m.name == "safety_validations_total"]
|
|
|
|
# Should have the metric with special chars
|
|
assert len(counters) > 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_histogram_export(self):
|
|
"""Test exporting histogram with no observations."""
|
|
metrics = SafetyMetrics()
|
|
|
|
# No observations, but histogram buckets should still exist
|
|
prometheus = await metrics.get_prometheus_format()
|
|
|
|
assert "validation_latency_seconds" in prometheus
|
|
assert "le=" in prometheus
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_prometheus_format_empty_label_value(self):
|
|
"""Test Prometheus format with empty label metrics."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.inc_approvals_granted() # Uses empty string as label
|
|
|
|
prometheus = await metrics.get_prometheus_format()
|
|
assert "safety_approvals_granted_total" in prometheus
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_resets(self):
|
|
"""Test multiple resets don't cause issues."""
|
|
metrics = SafetyMetrics()
|
|
|
|
await metrics.inc_validations("allow")
|
|
await metrics.reset()
|
|
await metrics.reset()
|
|
await metrics.reset()
|
|
|
|
summary = await metrics.get_summary()
|
|
assert summary["total_validations"] == 0
|