Files
syndarix/backend/app/services/safety/hitl/manager.py
Felipe Cardoso ef659cd72d feat(safety): add Phase C advanced controls
- Add rollback manager with file checkpointing and transaction context
- Add HITL manager with approval queues and notification handlers
- Add content filter with PII, secrets, and injection detection
- Add emergency controls with stop/pause/resume capabilities
- Update SafetyConfig with checkpoint_dir setting

Issue #63

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 11:36:24 +01:00

450 lines
13 KiB
Python

"""
Human-in-the-Loop (HITL) Manager
Manages approval workflows for actions requiring human oversight.
"""
import asyncio
import logging
from collections.abc import Callable
from datetime import datetime, timedelta
from typing import Any
from uuid import uuid4
from ..config import get_safety_config
from ..exceptions import (
ApprovalDeniedError,
ApprovalRequiredError,
ApprovalTimeoutError,
)
from ..models import (
ActionRequest,
ApprovalRequest,
ApprovalResponse,
ApprovalStatus,
)
logger = logging.getLogger(__name__)
class ApprovalQueue:
"""Queue for pending approval requests."""
def __init__(self) -> None:
self._pending: dict[str, ApprovalRequest] = {}
self._completed: dict[str, ApprovalResponse] = {}
self._waiters: dict[str, asyncio.Event] = {}
self._lock = asyncio.Lock()
async def add(self, request: ApprovalRequest) -> None:
"""Add an approval request to the queue."""
async with self._lock:
self._pending[request.id] = request
self._waiters[request.id] = asyncio.Event()
async def get_pending(self, request_id: str) -> ApprovalRequest | None:
"""Get a pending request by ID."""
async with self._lock:
return self._pending.get(request_id)
async def complete(self, response: ApprovalResponse) -> bool:
"""Complete an approval request."""
async with self._lock:
if response.request_id not in self._pending:
return False
del self._pending[response.request_id]
self._completed[response.request_id] = response
# Notify waiters
if response.request_id in self._waiters:
self._waiters[response.request_id].set()
return True
async def wait_for_response(
self,
request_id: str,
timeout_seconds: float,
) -> ApprovalResponse | None:
"""Wait for a response to an approval request."""
async with self._lock:
waiter = self._waiters.get(request_id)
if not waiter:
return self._completed.get(request_id)
try:
await asyncio.wait_for(waiter.wait(), timeout=timeout_seconds)
except TimeoutError:
return None
async with self._lock:
return self._completed.get(request_id)
async def list_pending(self) -> list[ApprovalRequest]:
"""List all pending requests."""
async with self._lock:
return list(self._pending.values())
async def cancel(self, request_id: str) -> bool:
"""Cancel a pending request."""
async with self._lock:
if request_id not in self._pending:
return False
del self._pending[request_id]
# Create cancelled response
response = ApprovalResponse(
request_id=request_id,
status=ApprovalStatus.CANCELLED,
reason="Cancelled",
)
self._completed[request_id] = response
# Notify waiters
if request_id in self._waiters:
self._waiters[request_id].set()
return True
async def cleanup_expired(self) -> int:
"""Clean up expired requests."""
now = datetime.utcnow()
to_timeout: list[str] = []
async with self._lock:
for request_id, request in self._pending.items():
if request.expires_at and request.expires_at < now:
to_timeout.append(request_id)
count = 0
for request_id in to_timeout:
async with self._lock:
if request_id in self._pending:
del self._pending[request_id]
self._completed[request_id] = ApprovalResponse(
request_id=request_id,
status=ApprovalStatus.TIMEOUT,
reason="Request timed out",
)
if request_id in self._waiters:
self._waiters[request_id].set()
count += 1
return count
class HITLManager:
"""
Manages Human-in-the-Loop approval workflows.
Features:
- Approval request queue
- Configurable timeout handling (default deny)
- Approval delegation
- Batch approval for similar actions
- Approval with modifications
- Notification channels
"""
def __init__(
self,
default_timeout: int | None = None,
) -> None:
"""
Initialize the HITLManager.
Args:
default_timeout: Default timeout for approval requests in seconds
"""
config = get_safety_config()
self._default_timeout = default_timeout or config.hitl_default_timeout
self._queue = ApprovalQueue()
self._notification_handlers: list[Callable[..., Any]] = []
self._running = False
self._cleanup_task: asyncio.Task[None] | None = None
async def start(self) -> None:
"""Start the HITL manager background tasks."""
if self._running:
return
self._running = True
self._cleanup_task = asyncio.create_task(self._periodic_cleanup())
logger.info("HITL Manager started")
async def stop(self) -> None:
"""Stop the HITL manager."""
self._running = False
if self._cleanup_task:
self._cleanup_task.cancel()
try:
await self._cleanup_task
except asyncio.CancelledError:
pass
logger.info("HITL Manager stopped")
async def request_approval(
self,
action: ActionRequest,
reason: str,
timeout_seconds: int | None = None,
urgency: str = "normal",
context: dict[str, Any] | None = None,
) -> ApprovalRequest:
"""
Create an approval request for an action.
Args:
action: The action requiring approval
reason: Why approval is required
timeout_seconds: Timeout for this request
urgency: Urgency level (low, normal, high, critical)
context: Additional context for the approver
Returns:
The created approval request
"""
timeout = timeout_seconds or self._default_timeout
expires_at = datetime.utcnow() + timedelta(seconds=timeout)
request = ApprovalRequest(
id=str(uuid4()),
action=action,
reason=reason,
urgency=urgency,
timeout_seconds=timeout,
expires_at=expires_at,
context=context or {},
)
await self._queue.add(request)
# Notify handlers
await self._notify_handlers("approval_requested", request)
logger.info(
"Approval requested: %s for action %s (timeout: %ds)",
request.id,
action.id,
timeout,
)
return request
async def wait_for_approval(
self,
request_id: str,
timeout_seconds: int | None = None,
) -> ApprovalResponse:
"""
Wait for an approval decision.
Args:
request_id: ID of the approval request
timeout_seconds: Override timeout
Returns:
The approval response
Raises:
ApprovalTimeoutError: If timeout expires
ApprovalDeniedError: If approval is denied
"""
request = await self._queue.get_pending(request_id)
if not request:
raise ApprovalRequiredError(
f"Approval request not found: {request_id}",
approval_id=request_id,
)
timeout = timeout_seconds or request.timeout_seconds or self._default_timeout
response = await self._queue.wait_for_response(request_id, timeout)
if response is None:
# Timeout - default deny
response = ApprovalResponse(
request_id=request_id,
status=ApprovalStatus.TIMEOUT,
reason="Request timed out (default deny)",
)
await self._queue.complete(response)
raise ApprovalTimeoutError(
"Approval request timed out",
approval_id=request_id,
timeout_seconds=timeout,
)
if response.status == ApprovalStatus.DENIED:
raise ApprovalDeniedError(
response.reason or "Approval denied",
approval_id=request_id,
denied_by=response.decided_by,
denial_reason=response.reason,
)
if response.status == ApprovalStatus.TIMEOUT:
raise ApprovalTimeoutError(
"Approval request timed out",
approval_id=request_id,
timeout_seconds=timeout,
)
if response.status == ApprovalStatus.CANCELLED:
raise ApprovalDeniedError(
"Approval request was cancelled",
approval_id=request_id,
denial_reason="Cancelled",
)
return response
async def approve(
self,
request_id: str,
decided_by: str,
reason: str | None = None,
modifications: dict[str, Any] | None = None,
) -> bool:
"""
Approve a pending request.
Args:
request_id: ID of the approval request
decided_by: Who approved
reason: Optional approval reason
modifications: Optional modifications to the action
Returns:
True if approval was recorded
"""
response = ApprovalResponse(
request_id=request_id,
status=ApprovalStatus.APPROVED,
decided_by=decided_by,
reason=reason,
modifications=modifications,
)
success = await self._queue.complete(response)
if success:
logger.info(
"Approval granted: %s by %s",
request_id,
decided_by,
)
await self._notify_handlers("approval_granted", response)
return success
async def deny(
self,
request_id: str,
decided_by: str,
reason: str | None = None,
) -> bool:
"""
Deny a pending request.
Args:
request_id: ID of the approval request
decided_by: Who denied
reason: Denial reason
Returns:
True if denial was recorded
"""
response = ApprovalResponse(
request_id=request_id,
status=ApprovalStatus.DENIED,
decided_by=decided_by,
reason=reason,
)
success = await self._queue.complete(response)
if success:
logger.info(
"Approval denied: %s by %s - %s",
request_id,
decided_by,
reason,
)
await self._notify_handlers("approval_denied", response)
return success
async def cancel(self, request_id: str) -> bool:
"""
Cancel a pending request.
Args:
request_id: ID of the approval request
Returns:
True if request was cancelled
"""
success = await self._queue.cancel(request_id)
if success:
logger.info("Approval request cancelled: %s", request_id)
return success
async def list_pending(self) -> list[ApprovalRequest]:
"""List all pending approval requests."""
return await self._queue.list_pending()
async def get_request(self, request_id: str) -> ApprovalRequest | None:
"""Get an approval request by ID."""
return await self._queue.get_pending(request_id)
def add_notification_handler(
self,
handler: Callable[..., Any],
) -> None:
"""Add a notification handler."""
self._notification_handlers.append(handler)
def remove_notification_handler(
self,
handler: Callable[..., Any],
) -> None:
"""Remove a notification handler."""
if handler in self._notification_handlers:
self._notification_handlers.remove(handler)
async def _notify_handlers(
self,
event_type: str,
data: Any,
) -> None:
"""Notify all handlers of an event."""
for handler in self._notification_handlers:
try:
if asyncio.iscoroutinefunction(handler):
await handler(event_type, data)
else:
handler(event_type, data)
except Exception as e:
logger.error("Error in notification handler: %s", e)
async def _periodic_cleanup(self) -> None:
"""Background task for cleaning up expired requests."""
while self._running:
try:
await asyncio.sleep(30) # Check every 30 seconds
count = await self._queue.cleanup_expired()
if count:
logger.debug("Cleaned up %d expired approval requests", count)
except asyncio.CancelledError:
break
except Exception as e:
logger.error("Error in approval cleanup: %s", e)