forked from cardosofelipe/fast-next-template
Add tests to improve backend coverage from 85% to 93%: - test_audit.py: 60 tests for AuditLogger (20% -> 99%) - Hash chain integrity, sanitization, retention, handlers - Fixed bug: hash chain modification after event creation - Fixed bug: verification not using correct prev_hash - test_hitl.py: Tests for HITL manager (0% -> 100%) - test_permissions.py: Tests for permissions manager (0% -> 99%) - test_rollback.py: Tests for rollback manager (0% -> 100%) - test_metrics.py: Tests for metrics collector (0% -> 100%) - test_mcp_integration.py: Tests for MCP safety wrapper (0% -> 100%) - test_validation.py: Additional cache and edge case tests (76% -> 100%) - test_scoring.py: Lock cleanup and edge case tests (78% -> 91%) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1137 lines
35 KiB
Python
1137 lines
35 KiB
Python
"""Tests for HITL (Human-in-the-Loop) Manager.
|
|
|
|
Tests cover:
|
|
- ApprovalQueue: add, get, complete, wait, cancel, cleanup
|
|
- HITLManager: lifecycle, request, wait, approve, deny, cancel, notifications
|
|
- Edge cases: timeouts, concurrent access, notification errors
|
|
"""
|
|
|
|
import asyncio
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from app.services.safety.exceptions import (
|
|
ApprovalDeniedError,
|
|
ApprovalRequiredError,
|
|
ApprovalTimeoutError,
|
|
)
|
|
from app.services.safety.hitl.manager import ApprovalQueue, HITLManager
|
|
from app.services.safety.models import (
|
|
ActionMetadata,
|
|
ActionRequest,
|
|
ActionType,
|
|
ApprovalRequest,
|
|
ApprovalResponse,
|
|
ApprovalStatus,
|
|
AutonomyLevel,
|
|
)
|
|
|
|
# ============================================================================
|
|
# Fixtures
|
|
# ============================================================================
|
|
|
|
|
|
@pytest.fixture
|
|
def action_metadata() -> ActionMetadata:
|
|
"""Create standard action metadata for tests."""
|
|
return ActionMetadata(
|
|
agent_id="test-agent",
|
|
project_id="test-project",
|
|
session_id="test-session",
|
|
autonomy_level=AutonomyLevel.MILESTONE,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def action_request(action_metadata: ActionMetadata) -> ActionRequest:
|
|
"""Create a standard action request for tests."""
|
|
return ActionRequest(
|
|
id="action-123",
|
|
action_type=ActionType.FILE_WRITE,
|
|
tool_name="file_write",
|
|
resource="/path/to/file.txt",
|
|
arguments={"content": "test content"},
|
|
metadata=action_metadata,
|
|
is_destructive=True,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def approval_request(action_request: ActionRequest) -> ApprovalRequest:
|
|
"""Create a standard approval request for tests."""
|
|
return ApprovalRequest(
|
|
id="approval-123",
|
|
action=action_request,
|
|
reason="File write requires approval",
|
|
urgency="normal",
|
|
timeout_seconds=30,
|
|
expires_at=datetime.utcnow() + timedelta(seconds=30),
|
|
)
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def approval_queue() -> ApprovalQueue:
|
|
"""Create an ApprovalQueue for testing."""
|
|
return ApprovalQueue()
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def hitl_manager() -> HITLManager:
|
|
"""Create an HITLManager for testing."""
|
|
with patch("app.services.safety.hitl.manager.get_safety_config") as mock_config:
|
|
mock_config.return_value = MagicMock(hitl_default_timeout=30)
|
|
manager = HITLManager(default_timeout=10)
|
|
yield manager
|
|
# Ensure cleanup
|
|
await manager.stop()
|
|
|
|
|
|
# ============================================================================
|
|
# ApprovalQueue Tests
|
|
# ============================================================================
|
|
|
|
|
|
class TestApprovalQueue:
|
|
"""Tests for the ApprovalQueue class."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_add_and_get_pending(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test adding and retrieving a pending request."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
pending = await approval_queue.get_pending(approval_request.id)
|
|
assert pending is not None
|
|
assert pending.id == approval_request.id
|
|
assert pending.reason == "File write requires approval"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_pending_nonexistent(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
) -> None:
|
|
"""Test getting a non-existent pending request returns None."""
|
|
pending = await approval_queue.get_pending("nonexistent-id")
|
|
assert pending is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_success(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test completing an approval request."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
response = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.APPROVED,
|
|
decided_by="admin",
|
|
reason="Looks good",
|
|
)
|
|
|
|
success = await approval_queue.complete(response)
|
|
assert success is True
|
|
|
|
# Should no longer be pending
|
|
pending = await approval_queue.get_pending(approval_request.id)
|
|
assert pending is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_nonexistent(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
) -> None:
|
|
"""Test completing a non-existent request returns False."""
|
|
response = ApprovalResponse(
|
|
request_id="nonexistent-id",
|
|
status=ApprovalStatus.APPROVED,
|
|
)
|
|
|
|
success = await approval_queue.complete(response)
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_notifies_waiters(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test that completing a request notifies waiters."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
# Start waiting in background
|
|
wait_task = asyncio.create_task(
|
|
approval_queue.wait_for_response(approval_request.id, timeout_seconds=5.0)
|
|
)
|
|
|
|
# Give the wait task time to start
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Complete the request
|
|
response = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.APPROVED,
|
|
decided_by="admin",
|
|
)
|
|
await approval_queue.complete(response)
|
|
|
|
# Wait should return the response
|
|
result = await wait_task
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.APPROVED
|
|
assert result.decided_by == "admin"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_response_timeout(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test waiting for a response that times out."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
result = await approval_queue.wait_for_response(
|
|
approval_request.id,
|
|
timeout_seconds=0.05, # 50ms timeout
|
|
)
|
|
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_response_already_completed(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test waiting for an already completed response."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
# Complete first
|
|
response = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.DENIED,
|
|
reason="Not allowed",
|
|
)
|
|
await approval_queue.complete(response)
|
|
|
|
# Wait should return the completed response
|
|
result = await approval_queue.wait_for_response(
|
|
approval_request.id,
|
|
timeout_seconds=1.0,
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.DENIED
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_nonexistent_request(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
) -> None:
|
|
"""Test waiting for a request that was never added."""
|
|
result = await approval_queue.wait_for_response(
|
|
"nonexistent-id",
|
|
timeout_seconds=0.1,
|
|
)
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_pending(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test listing all pending requests."""
|
|
# Add multiple requests
|
|
req1 = ApprovalRequest(
|
|
id="req-1",
|
|
action=action_request,
|
|
reason="Reason 1",
|
|
)
|
|
req2 = ApprovalRequest(
|
|
id="req-2",
|
|
action=action_request,
|
|
reason="Reason 2",
|
|
)
|
|
|
|
await approval_queue.add(req1)
|
|
await approval_queue.add(req2)
|
|
|
|
pending = await approval_queue.list_pending()
|
|
assert len(pending) == 2
|
|
ids = {r.id for r in pending}
|
|
assert ids == {"req-1", "req-2"}
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cancel_success(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test cancelling a pending request."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
success = await approval_queue.cancel(approval_request.id)
|
|
assert success is True
|
|
|
|
# Should no longer be pending
|
|
pending = await approval_queue.get_pending(approval_request.id)
|
|
assert pending is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cancel_nonexistent(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
) -> None:
|
|
"""Test cancelling a non-existent request."""
|
|
success = await approval_queue.cancel("nonexistent-id")
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cancel_notifies_waiters(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test that cancellation notifies waiters."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
# Start waiting in background
|
|
wait_task = asyncio.create_task(
|
|
approval_queue.wait_for_response(approval_request.id, timeout_seconds=5.0)
|
|
)
|
|
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Cancel the request
|
|
await approval_queue.cancel(approval_request.id)
|
|
|
|
# Wait should return the cancelled response
|
|
result = await wait_task
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.CANCELLED
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_expired(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test cleaning up expired requests."""
|
|
# Create an already-expired request
|
|
expired_request = ApprovalRequest(
|
|
id="expired-req",
|
|
action=action_request,
|
|
reason="Expired reason",
|
|
expires_at=datetime.utcnow() - timedelta(seconds=10), # Already expired
|
|
)
|
|
await approval_queue.add(expired_request)
|
|
|
|
# Create a valid request
|
|
valid_request = ApprovalRequest(
|
|
id="valid-req",
|
|
action=action_request,
|
|
reason="Valid reason",
|
|
expires_at=datetime.utcnow() + timedelta(seconds=300),
|
|
)
|
|
await approval_queue.add(valid_request)
|
|
|
|
# Cleanup should remove expired
|
|
count = await approval_queue.cleanup_expired()
|
|
assert count == 1
|
|
|
|
# Expired should be gone
|
|
pending = await approval_queue.get_pending("expired-req")
|
|
assert pending is None
|
|
|
|
# Valid should remain
|
|
pending = await approval_queue.get_pending("valid-req")
|
|
assert pending is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_expired_notifies_waiters(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that cleanup notifies waiters of expired requests."""
|
|
expired_request = ApprovalRequest(
|
|
id="expired-req",
|
|
action=action_request,
|
|
reason="Expired",
|
|
expires_at=datetime.utcnow() - timedelta(seconds=1),
|
|
)
|
|
await approval_queue.add(expired_request)
|
|
|
|
# Start waiting
|
|
wait_task = asyncio.create_task(
|
|
approval_queue.wait_for_response("expired-req", timeout_seconds=5.0)
|
|
)
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Cleanup
|
|
await approval_queue.cleanup_expired()
|
|
|
|
# Wait should return timeout response
|
|
result = await wait_task
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.TIMEOUT
|
|
|
|
|
|
# ============================================================================
|
|
# HITLManager Tests
|
|
# ============================================================================
|
|
|
|
|
|
class TestHITLManager:
|
|
"""Tests for the HITLManager class."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_start_and_stop(self, hitl_manager: HITLManager) -> None:
|
|
"""Test starting and stopping the manager."""
|
|
await hitl_manager.start()
|
|
assert hitl_manager._running is True
|
|
assert hitl_manager._cleanup_task is not None
|
|
|
|
await hitl_manager.stop()
|
|
assert hitl_manager._running is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_start_idempotent(self, hitl_manager: HITLManager) -> None:
|
|
"""Test that starting twice is safe."""
|
|
await hitl_manager.start()
|
|
task1 = hitl_manager._cleanup_task
|
|
|
|
await hitl_manager.start()
|
|
task2 = hitl_manager._cleanup_task
|
|
|
|
# Should be the same task
|
|
assert task1 is task2
|
|
|
|
await hitl_manager.stop()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_request_approval(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test creating an approval request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Destructive action",
|
|
timeout_seconds=60,
|
|
urgency="high",
|
|
context={"extra": "info"},
|
|
)
|
|
|
|
assert request.id is not None
|
|
assert request.action.id == action_request.id
|
|
assert request.reason == "Destructive action"
|
|
assert request.urgency == "high"
|
|
assert request.timeout_seconds == 60
|
|
assert request.context == {"extra": "info"}
|
|
assert request.expires_at is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_request_approval_default_timeout(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test approval request uses default timeout."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
# Should use the manager's default timeout (10 seconds from fixture)
|
|
assert request.timeout_seconds == 10
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_success(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test waiting for an approved request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
timeout_seconds=5,
|
|
)
|
|
|
|
# Approve in background
|
|
async def approve_later():
|
|
await asyncio.sleep(0.05)
|
|
await hitl_manager.approve(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
reason="Approved",
|
|
)
|
|
|
|
_task = asyncio.create_task(approve_later()) # noqa: RUF006
|
|
|
|
response = await hitl_manager.wait_for_approval(request.id)
|
|
assert response.status == ApprovalStatus.APPROVED
|
|
assert response.decided_by == "admin"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_denied(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test waiting for a denied request raises error."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
# Deny in background
|
|
async def deny_later():
|
|
await asyncio.sleep(0.05)
|
|
await hitl_manager.deny(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
reason="Not allowed",
|
|
)
|
|
|
|
_task = asyncio.create_task(deny_later()) # noqa: RUF006
|
|
|
|
with pytest.raises(ApprovalDeniedError) as exc_info:
|
|
await hitl_manager.wait_for_approval(request.id)
|
|
|
|
assert exc_info.value.approval_id == request.id
|
|
assert exc_info.value.denied_by == "admin"
|
|
assert "Not allowed" in str(exc_info.value.denial_reason)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_timeout(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test waiting for approval that times out."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
timeout_seconds=1, # Short timeout
|
|
)
|
|
|
|
with pytest.raises(ApprovalTimeoutError) as exc_info:
|
|
await hitl_manager.wait_for_approval(request.id, timeout_seconds=0.1)
|
|
|
|
assert exc_info.value.approval_id == request.id
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_cancelled(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test waiting for a cancelled request raises error."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
# Cancel in background
|
|
async def cancel_later():
|
|
await asyncio.sleep(0.05)
|
|
await hitl_manager.cancel(request.id)
|
|
|
|
_task = asyncio.create_task(cancel_later()) # noqa: RUF006
|
|
|
|
with pytest.raises(ApprovalDeniedError) as exc_info:
|
|
await hitl_manager.wait_for_approval(request.id)
|
|
|
|
assert "Cancelled" in str(exc_info.value.denial_reason)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_not_found(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test waiting for a non-existent request raises error."""
|
|
with pytest.raises(ApprovalRequiredError) as exc_info:
|
|
await hitl_manager.wait_for_approval("nonexistent-id")
|
|
|
|
assert "nonexistent-id" in str(exc_info.value)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_approve_success(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test approving a request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
success = await hitl_manager.approve(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
reason="Looks good",
|
|
modifications={"timeout": 30},
|
|
)
|
|
|
|
assert success is True
|
|
|
|
# Should no longer be pending
|
|
pending = await hitl_manager.get_request(request.id)
|
|
assert pending is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_approve_nonexistent(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test approving a non-existent request."""
|
|
success = await hitl_manager.approve(
|
|
request_id="nonexistent-id",
|
|
decided_by="admin",
|
|
)
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deny_success(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test denying a request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
success = await hitl_manager.deny(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
reason="Security concern",
|
|
)
|
|
|
|
assert success is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_deny_nonexistent(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test denying a non-existent request."""
|
|
success = await hitl_manager.deny(
|
|
request_id="nonexistent-id",
|
|
decided_by="admin",
|
|
)
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cancel_success(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test cancelling a request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
success = await hitl_manager.cancel(request.id)
|
|
assert success is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_list_pending(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test listing pending requests."""
|
|
await hitl_manager.request_approval(action=action_request, reason="Test 1")
|
|
await hitl_manager.request_approval(action=action_request, reason="Test 2")
|
|
|
|
pending = await hitl_manager.list_pending()
|
|
assert len(pending) == 2
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_request(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test getting a specific request."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
retrieved = await hitl_manager.get_request(request.id)
|
|
assert retrieved is not None
|
|
assert retrieved.id == request.id
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_request_nonexistent(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test getting a non-existent request."""
|
|
retrieved = await hitl_manager.get_request("nonexistent-id")
|
|
assert retrieved is None
|
|
|
|
|
|
# ============================================================================
|
|
# Notification Handler Tests
|
|
# ============================================================================
|
|
|
|
|
|
class TestHITLNotifications:
|
|
"""Tests for notification handler functionality."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_add_notification_handler(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test adding a notification handler."""
|
|
handler = MagicMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
assert handler in hitl_manager._notification_handlers
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_remove_notification_handler(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test removing a notification handler."""
|
|
handler = MagicMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
hitl_manager.remove_notification_handler(handler)
|
|
assert handler not in hitl_manager._notification_handlers
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_remove_nonexistent_handler(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test removing a handler that was never added."""
|
|
handler = MagicMock()
|
|
# Should not raise
|
|
hitl_manager.remove_notification_handler(handler)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_sync_handler_called_on_approval_request(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that sync handlers are called on approval request."""
|
|
handler = MagicMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
|
|
await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
handler.assert_called_once()
|
|
args = handler.call_args[0]
|
|
assert args[0] == "approval_requested"
|
|
assert isinstance(args[1], ApprovalRequest)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_handler_called_on_approval_request(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that async handlers are called on approval request."""
|
|
handler = AsyncMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
|
|
await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
handler.assert_called_once()
|
|
args = handler.call_args[0]
|
|
assert args[0] == "approval_requested"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_handler_called_on_approval_granted(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that handlers are called when approval is granted."""
|
|
handler = MagicMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
# Reset to check only approve call
|
|
handler.reset_mock()
|
|
|
|
await hitl_manager.approve(request.id, decided_by="admin")
|
|
|
|
handler.assert_called_once()
|
|
args = handler.call_args[0]
|
|
assert args[0] == "approval_granted"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_handler_called_on_approval_denied(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that handlers are called when approval is denied."""
|
|
handler = MagicMock()
|
|
hitl_manager.add_notification_handler(handler)
|
|
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
handler.reset_mock()
|
|
|
|
await hitl_manager.deny(request.id, decided_by="admin", reason="No")
|
|
|
|
handler.assert_called_once()
|
|
args = handler.call_args[0]
|
|
assert args[0] == "approval_denied"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_handler_error_logged_not_raised(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that handler errors are logged but don't crash the manager."""
|
|
|
|
def bad_handler(event_type, data):
|
|
raise ValueError("Handler exploded!")
|
|
|
|
hitl_manager.add_notification_handler(bad_handler)
|
|
|
|
# Should not raise despite the handler error
|
|
with patch("app.services.safety.hitl.manager.logger") as mock_logger:
|
|
await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
# Error should be logged
|
|
mock_logger.error.assert_called()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_handler_error_logged(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that async handler errors are logged."""
|
|
|
|
async def bad_async_handler(event_type, data):
|
|
raise RuntimeError("Async handler exploded!")
|
|
|
|
hitl_manager.add_notification_handler(bad_async_handler)
|
|
|
|
with patch("app.services.safety.hitl.manager.logger") as mock_logger:
|
|
await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
mock_logger.error.assert_called()
|
|
|
|
|
|
# ============================================================================
|
|
# Edge Cases and Potential Bug Detection
|
|
# ============================================================================
|
|
|
|
|
|
class TestHITLEdgeCases:
|
|
"""Edge cases that could reveal hidden bugs."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_complete_same_request(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test that concurrent completions are handled safely."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
# Try to complete twice concurrently
|
|
response1 = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.APPROVED,
|
|
decided_by="admin1",
|
|
)
|
|
response2 = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.DENIED,
|
|
decided_by="admin2",
|
|
)
|
|
|
|
results = await asyncio.gather(
|
|
approval_queue.complete(response1),
|
|
approval_queue.complete(response2),
|
|
)
|
|
|
|
# Only one should succeed
|
|
assert sum(results) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_double_cancel(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test that double cancellation is safe."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
success1 = await approval_queue.cancel(approval_request.id)
|
|
success2 = await approval_queue.cancel(approval_request.id)
|
|
|
|
assert success1 is True
|
|
assert success2 is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_complete_after_cancel(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test that completing after cancellation fails."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
await approval_queue.cancel(approval_request.id)
|
|
|
|
response = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.APPROVED,
|
|
)
|
|
|
|
success = await approval_queue.complete(response)
|
|
assert success is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_after_complete(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
approval_request: ApprovalRequest,
|
|
) -> None:
|
|
"""Test waiting after request is already completed."""
|
|
await approval_queue.add(approval_request)
|
|
|
|
response = ApprovalResponse(
|
|
request_id=approval_request.id,
|
|
status=ApprovalStatus.APPROVED,
|
|
decided_by="admin",
|
|
)
|
|
await approval_queue.complete(response)
|
|
|
|
# Wait should return immediately with the cached response
|
|
result = await approval_queue.wait_for_response(
|
|
approval_request.id,
|
|
timeout_seconds=1.0,
|
|
)
|
|
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.APPROVED
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_approval_with_modifications(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test approval with action modifications."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
modifications = {"timeout": 60, "sandbox": True}
|
|
|
|
async def approve_later():
|
|
await asyncio.sleep(0.05)
|
|
await hitl_manager.approve(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
modifications=modifications,
|
|
)
|
|
|
|
_task = asyncio.create_task(approve_later()) # noqa: RUF006
|
|
|
|
response = await hitl_manager.wait_for_approval(request.id)
|
|
|
|
assert response.modifications == modifications
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_while_waiting(
|
|
self,
|
|
approval_queue: ApprovalQueue,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test cleanup running while someone is waiting."""
|
|
# Create an expired request
|
|
expired_request = ApprovalRequest(
|
|
id="expired-req",
|
|
action=action_request,
|
|
reason="Expired",
|
|
expires_at=datetime.utcnow() - timedelta(seconds=1),
|
|
)
|
|
await approval_queue.add(expired_request)
|
|
|
|
# Start waiting
|
|
wait_task = asyncio.create_task(
|
|
approval_queue.wait_for_response("expired-req", timeout_seconds=5.0)
|
|
)
|
|
await asyncio.sleep(0.01)
|
|
|
|
# Run cleanup
|
|
count = await approval_queue.cleanup_expired()
|
|
assert count == 1
|
|
|
|
# Wait should get timeout response
|
|
result = await wait_task
|
|
assert result is not None
|
|
assert result.status == ApprovalStatus.TIMEOUT
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_very_short_timeout(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test handling of very short timeout values."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
timeout_seconds=1,
|
|
)
|
|
|
|
# Zero timeout should trigger immediately
|
|
with pytest.raises(ApprovalTimeoutError):
|
|
await hitl_manager.wait_for_approval(request.id, timeout_seconds=0.01)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_empty_reason_approval(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test approval with empty reason."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
success = await hitl_manager.approve(
|
|
request_id=request.id,
|
|
decided_by="admin",
|
|
reason=None, # No reason provided
|
|
)
|
|
|
|
assert success is True
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_wait_for_approval_status_timeout_from_queue(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that TIMEOUT status from queue raises ApprovalTimeoutError."""
|
|
request = await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
timeout_seconds=10,
|
|
)
|
|
|
|
# Simulate cleanup marking as timeout
|
|
async def simulate_cleanup():
|
|
await asyncio.sleep(0.05)
|
|
response = ApprovalResponse(
|
|
request_id=request.id,
|
|
status=ApprovalStatus.TIMEOUT,
|
|
reason="Timed out by cleanup",
|
|
)
|
|
await hitl_manager._queue.complete(response)
|
|
|
|
_task = asyncio.create_task(simulate_cleanup()) # noqa: RUF006
|
|
|
|
with pytest.raises(ApprovalTimeoutError):
|
|
await hitl_manager.wait_for_approval(request.id)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_handlers_all_called(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
action_request: ActionRequest,
|
|
) -> None:
|
|
"""Test that multiple handlers are all called."""
|
|
handler1 = MagicMock()
|
|
handler2 = MagicMock()
|
|
handler3 = AsyncMock()
|
|
|
|
hitl_manager.add_notification_handler(handler1)
|
|
hitl_manager.add_notification_handler(handler2)
|
|
hitl_manager.add_notification_handler(handler3)
|
|
|
|
await hitl_manager.request_approval(
|
|
action=action_request,
|
|
reason="Test",
|
|
)
|
|
|
|
handler1.assert_called_once()
|
|
handler2.assert_called_once()
|
|
handler3.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_periodic_cleanup_runs(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test that periodic cleanup task runs without errors."""
|
|
await hitl_manager.start()
|
|
|
|
# Let it run briefly
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Should still be running
|
|
assert hitl_manager._running is True
|
|
assert hitl_manager._cleanup_task is not None
|
|
assert not hitl_manager._cleanup_task.done()
|
|
|
|
await hitl_manager.stop()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_stop_cancels_cleanup_task(
|
|
self,
|
|
hitl_manager: HITLManager,
|
|
) -> None:
|
|
"""Test that stop properly cancels the cleanup task."""
|
|
await hitl_manager.start()
|
|
cleanup_task = hitl_manager._cleanup_task
|
|
|
|
await hitl_manager.stop()
|
|
|
|
# Task should be cancelled
|
|
assert cleanup_task.cancelled() or cleanup_task.done()
|