test(safety): add comprehensive tests for safety framework modules

Add tests to improve backend coverage from 85% to 93%:

- test_audit.py: 60 tests for AuditLogger (20% -> 99%)
  - Hash chain integrity, sanitization, retention, handlers
  - Fixed bug: hash chain modification after event creation
  - Fixed bug: verification not using correct prev_hash

- test_hitl.py: Tests for HITL manager (0% -> 100%)
- test_permissions.py: Tests for permissions manager (0% -> 99%)
- test_rollback.py: Tests for rollback manager (0% -> 100%)
- test_metrics.py: Tests for metrics collector (0% -> 100%)
- test_mcp_integration.py: Tests for MCP safety wrapper (0% -> 100%)
- test_validation.py: Additional cache and edge case tests (76% -> 100%)
- test_scoring.py: Lock cleanup and edge case tests (78% -> 91%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-04 19:41:54 +01:00
parent 758052dcff
commit 60ebeaa582
10 changed files with 6025 additions and 9 deletions

View File

@@ -758,3 +758,136 @@ class TestBaseScorer:
# Boundaries
assert scorer.normalize_score(0.0) == 0.0
assert scorer.normalize_score(1.0) == 1.0
class TestCompositeScorerEdgeCases:
"""Tests for CompositeScorer edge cases and lock management."""
@pytest.mark.asyncio
async def test_score_with_zero_weights(self) -> None:
"""Test scoring when all weights are zero."""
scorer = CompositeScorer(
relevance_weight=0.0,
recency_weight=0.0,
priority_weight=0.0,
)
context = KnowledgeContext(
content="Test content",
source="docs",
relevance_score=0.8,
)
# Should return 0.0 when total weight is 0
score = await scorer.score(context, "test query")
assert score == 0.0
@pytest.mark.asyncio
async def test_score_batch_sequential(self) -> None:
"""Test batch scoring in sequential mode (parallel=False)."""
scorer = CompositeScorer()
contexts = [
KnowledgeContext(
content="Content 1",
source="docs",
relevance_score=0.8,
),
KnowledgeContext(
content="Content 2",
source="docs",
relevance_score=0.5,
),
]
# Use parallel=False to cover the sequential path
scored = await scorer.score_batch(contexts, "query", parallel=False)
assert len(scored) == 2
assert scored[0].relevance_score == 0.8
assert scored[1].relevance_score == 0.5
@pytest.mark.asyncio
async def test_lock_fast_path_reuse(self) -> None:
"""Test that existing locks are reused via fast path."""
scorer = CompositeScorer()
context = KnowledgeContext(
content="Test",
source="docs",
relevance_score=0.5,
)
# First access creates the lock
lock1 = await scorer._get_context_lock(context.id)
# Second access should hit the fast path (lock exists in dict)
lock2 = await scorer._get_context_lock(context.id)
assert lock2 is lock1 # Same lock object returned
@pytest.mark.asyncio
async def test_lock_cleanup_when_limit_reached(self) -> None:
"""Test that old locks are cleaned up when limit is reached."""
import time
# Create scorer with very low max_locks to trigger cleanup
scorer = CompositeScorer()
scorer._max_locks = 3
scorer._lock_ttl = 0.1 # 100ms TTL
# Create locks for several context IDs
context_ids = [f"ctx-{i}" for i in range(5)]
# Get locks for first 3 contexts (fill up to limit)
for ctx_id in context_ids[:3]:
await scorer._get_context_lock(ctx_id)
# Wait for TTL to expire
time.sleep(0.15)
# Getting a lock for a new context should trigger cleanup
await scorer._get_context_lock(context_ids[3])
# Some old locks should have been cleaned up
# The exact number depends on cleanup logic
assert len(scorer._context_locks) <= scorer._max_locks + 1
@pytest.mark.asyncio
async def test_lock_cleanup_preserves_held_locks(self) -> None:
"""Test that cleanup doesn't remove locks that are currently held."""
import time
scorer = CompositeScorer()
scorer._max_locks = 2
scorer._lock_ttl = 0.05 # 50ms TTL
# Get and hold lock1
lock1 = await scorer._get_context_lock("ctx-1")
async with lock1:
# While holding lock1, add more locks
await scorer._get_context_lock("ctx-2")
time.sleep(0.1) # Let TTL expire
# Adding another should trigger cleanup
await scorer._get_context_lock("ctx-3")
# lock1 should still exist (it's held)
assert any(lock is lock1 for lock, _ in scorer._context_locks.values())
@pytest.mark.asyncio
async def test_concurrent_lock_acquisition_double_check(self) -> None:
"""Test that concurrent lock acquisition uses double-check pattern."""
import asyncio
scorer = CompositeScorer()
context_id = "test-context-id"
# Simulate concurrent lock acquisition
async def get_lock():
return await scorer._get_context_lock(context_id)
locks = await asyncio.gather(*[get_lock() for _ in range(10)])
# All should get the same lock (double-check pattern ensures this)
assert all(lock is locks[0] for lock in locks)