forked from cardosofelipe/fast-next-template
Core safety framework architecture for autonomous agent guardrails: **Core Components:** - SafetyGuardian: Main orchestrator for all safety checks - AuditLogger: Comprehensive audit logging with hash chain tamper detection - SafetyConfig: Pydantic-based configuration - Models: Action requests, validation results, policies, checkpoints **Exception Hierarchy:** - SafetyError base with context preservation - Permission, Budget, RateLimit, Loop errors - Approval workflow errors (Required, Denied, Timeout) - Rollback, Sandbox, Emergency exceptions **Safety Policy System:** - Autonomy level based policies (FULL_CONTROL, MILESTONE, AUTONOMOUS) - Cost limits, rate limits, permission patterns - HITL approval requirements per action type - Configurable loop detection thresholds **Directory Structure:** - validation/, costs/, limits/, loops/ - Control subsystems - permissions/, rollback/, hitl/ - Access and recovery - content/, sandbox/, emergency/ - Protection systems - audit/, policies/ - Logging and configuration Phase A establishes the architecture. Subsystems to be implemented in Phase B-C. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
278 lines
7.3 KiB
Python
278 lines
7.3 KiB
Python
"""
|
|
Safety Framework Exceptions
|
|
|
|
Custom exception classes for the safety and guardrails framework.
|
|
"""
|
|
|
|
from typing import Any
|
|
|
|
|
|
class SafetyError(Exception):
|
|
"""Base exception for all safety-related errors."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str,
|
|
*,
|
|
action_id: str | None = None,
|
|
agent_id: str | None = None,
|
|
details: dict[str, Any] | None = None,
|
|
) -> None:
|
|
super().__init__(message)
|
|
self.message = message
|
|
self.action_id = action_id
|
|
self.agent_id = agent_id
|
|
self.details = details or {}
|
|
|
|
|
|
class PermissionDeniedError(SafetyError):
|
|
"""Raised when an action is not permitted."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Permission denied",
|
|
*,
|
|
action_type: str | None = None,
|
|
resource: str | None = None,
|
|
required_permission: str | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.action_type = action_type
|
|
self.resource = resource
|
|
self.required_permission = required_permission
|
|
|
|
|
|
class BudgetExceededError(SafetyError):
|
|
"""Raised when cost budget is exceeded."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Budget exceeded",
|
|
*,
|
|
budget_type: str = "session",
|
|
current_usage: float = 0.0,
|
|
budget_limit: float = 0.0,
|
|
unit: str = "tokens",
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.budget_type = budget_type
|
|
self.current_usage = current_usage
|
|
self.budget_limit = budget_limit
|
|
self.unit = unit
|
|
|
|
|
|
class RateLimitExceededError(SafetyError):
|
|
"""Raised when rate limit is exceeded."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Rate limit exceeded",
|
|
*,
|
|
limit_type: str = "actions",
|
|
limit_value: int = 0,
|
|
window_seconds: int = 60,
|
|
retry_after_seconds: float = 0.0,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.limit_type = limit_type
|
|
self.limit_value = limit_value
|
|
self.window_seconds = window_seconds
|
|
self.retry_after_seconds = retry_after_seconds
|
|
|
|
|
|
class LoopDetectedError(SafetyError):
|
|
"""Raised when an action loop is detected."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Loop detected",
|
|
*,
|
|
loop_type: str = "exact",
|
|
repetition_count: int = 0,
|
|
action_pattern: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.loop_type = loop_type
|
|
self.repetition_count = repetition_count
|
|
self.action_pattern = action_pattern or []
|
|
|
|
|
|
class ApprovalRequiredError(SafetyError):
|
|
"""Raised when human approval is required."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Human approval required",
|
|
*,
|
|
approval_id: str | None = None,
|
|
reason: str | None = None,
|
|
timeout_seconds: int = 300,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.approval_id = approval_id
|
|
self.reason = reason
|
|
self.timeout_seconds = timeout_seconds
|
|
|
|
|
|
class ApprovalDeniedError(SafetyError):
|
|
"""Raised when human explicitly denies an action."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Approval denied by human",
|
|
*,
|
|
approval_id: str | None = None,
|
|
denied_by: str | None = None,
|
|
denial_reason: str | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.approval_id = approval_id
|
|
self.denied_by = denied_by
|
|
self.denial_reason = denial_reason
|
|
|
|
|
|
class ApprovalTimeoutError(SafetyError):
|
|
"""Raised when approval request times out."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Approval request timed out",
|
|
*,
|
|
approval_id: str | None = None,
|
|
timeout_seconds: int = 300,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.approval_id = approval_id
|
|
self.timeout_seconds = timeout_seconds
|
|
|
|
|
|
class RollbackError(SafetyError):
|
|
"""Raised when rollback fails."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Rollback failed",
|
|
*,
|
|
checkpoint_id: str | None = None,
|
|
failed_actions: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.checkpoint_id = checkpoint_id
|
|
self.failed_actions = failed_actions or []
|
|
|
|
|
|
class CheckpointError(SafetyError):
|
|
"""Raised when checkpoint creation fails."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Checkpoint creation failed",
|
|
*,
|
|
checkpoint_type: str | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.checkpoint_type = checkpoint_type
|
|
|
|
|
|
class ValidationError(SafetyError):
|
|
"""Raised when action validation fails."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Validation failed",
|
|
*,
|
|
validation_rules: list[str] | None = None,
|
|
failed_rules: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.validation_rules = validation_rules or []
|
|
self.failed_rules = failed_rules or []
|
|
|
|
|
|
class ContentFilterError(SafetyError):
|
|
"""Raised when content filtering detects prohibited content."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Prohibited content detected",
|
|
*,
|
|
filter_type: str | None = None,
|
|
detected_patterns: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.filter_type = filter_type
|
|
self.detected_patterns = detected_patterns or []
|
|
|
|
|
|
class SandboxError(SafetyError):
|
|
"""Raised when sandbox execution fails."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Sandbox execution failed",
|
|
*,
|
|
exit_code: int | None = None,
|
|
stderr: str | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.exit_code = exit_code
|
|
self.stderr = stderr
|
|
|
|
|
|
class SandboxTimeoutError(SandboxError):
|
|
"""Raised when sandbox execution times out."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Sandbox execution timed out",
|
|
*,
|
|
timeout_seconds: int = 300,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.timeout_seconds = timeout_seconds
|
|
|
|
|
|
class EmergencyStopError(SafetyError):
|
|
"""Raised when emergency stop is triggered."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Emergency stop triggered",
|
|
*,
|
|
stop_type: str = "kill",
|
|
triggered_by: str | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.stop_type = stop_type
|
|
self.triggered_by = triggered_by
|
|
|
|
|
|
class PolicyViolationError(SafetyError):
|
|
"""Raised when an action violates a safety policy."""
|
|
|
|
def __init__(
|
|
self,
|
|
message: str = "Policy violation",
|
|
*,
|
|
policy_name: str | None = None,
|
|
violated_rules: list[str] | None = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(message, **kwargs)
|
|
self.policy_name = policy_name
|
|
self.violated_rules = violated_rules or []
|