feat(backend): add safety framework foundation (Phase A) (#63)

Core safety framework architecture for autonomous agent guardrails: **Core Components:** - SafetyGuardian: Main orchestrator for all safety checks - AuditLogger: Comprehensive audit logging with hash chain tamper detection - SafetyConfig: Pydantic-based configuration - Models: Action requests, validation results, policies, checkpoints **Exception Hierarchy:** - SafetyError base with context preservation - Permission, Budget, RateLimit, Loop errors - Approval workflow errors (Required, Denied, Timeout) - Rollback, Sandbox, Emergency exceptions **Safety Policy System:** - Autonomy level based policies (FULL_CONTROL, MILESTONE, AUTONOMOUS) - Cost limits, rate limits, permission patterns - HITL approval requirements per action type - Configurable loop detection thresholds **Directory Structure:** - validation/, costs/, limits/, loops/ - Control subsystems - permissions/, rollback/, hitl/ - Access and recovery - content/, sandbox/, emergency/ - Protection systems - audit/, policies/ - Logging and configuration Phase A establishes the architecture. Subsystems to be implemented in Phase B-C. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 11:22:25 +01:00
parent e5975fa5d0
commit 498c0a0e94
18 changed files with 2450 additions and 0 deletions
--- a/backend/app/services/safety/init.py
+++ b/backend/app/services/safety/init.py
@@ -0,0 +1,170 @@
+"""
+Safety and Guardrails Framework
+
+Comprehensive safety framework for autonomous agent operation.
+Provides multi-layered protection including:
+- Pre-execution validation
+- Cost and budget controls
+- Rate limiting
+- Loop detection and prevention
+- Human-in-the-loop approval
+- Rollback and checkpointing
+- Content filtering
+- Sandboxed execution
+- Emergency controls
+- Complete audit trail
+
+Usage:
+    from app.services.safety import get_safety_guardian, SafetyGuardian
+
+    guardian = await get_safety_guardian()
+    result = await guardian.validate(action_request)
+
+    if result.allowed:
+        # Execute action
+        pass
+    else:
+        # Handle denial
+        print(f"Action denied: {result.reasons}")
+"""
+
+# Exceptions
+# Audit
+from .audit import (
+    AuditLogger,
+    get_audit_logger,
+    reset_audit_logger,
+    shutdown_audit_logger,
+)
+
+# Configuration
+from .config import (
+    AutonomyConfig,
+    SafetyConfig,
+    get_autonomy_config,
+    get_default_policy,
+    get_policy_for_autonomy_level,
+    get_safety_config,
+    load_policies_from_directory,
+    load_policy_from_file,
+    reset_config_cache,
+)
+from .exceptions import (
+    ApprovalDeniedError,
+    ApprovalRequiredError,
+    ApprovalTimeoutError,
+    BudgetExceededError,
+    CheckpointError,
+    ContentFilterError,
+    EmergencyStopError,
+    LoopDetectedError,
+    PermissionDeniedError,
+    PolicyViolationError,
+    RateLimitExceededError,
+    RollbackError,
+    SafetyError,
+    SandboxError,
+    SandboxTimeoutError,
+    ValidationError,
+)
+
+# Guardian
+from .guardian import (
+    SafetyGuardian,
+    get_safety_guardian,
+    reset_safety_guardian,
+    shutdown_safety_guardian,
+)
+
+# Models
+from .models import (
+    ActionMetadata,
+    ActionRequest,
+    ActionResult,
+    ActionType,
+    ApprovalRequest,
+    ApprovalResponse,
+    ApprovalStatus,
+    AuditEvent,
+    AuditEventType,
+    AutonomyLevel,
+    BudgetScope,
+    BudgetStatus,
+    Checkpoint,
+    CheckpointType,
+    GuardianResult,
+    PermissionLevel,
+    RateLimitConfig,
+    RateLimitStatus,
+    ResourceType,
+    RollbackResult,
+    SafetyDecision,
+    SafetyPolicy,
+    ValidationResult,
+    ValidationRule,
+)
+
+__all__ = [
+    "ActionMetadata",
+    "ActionRequest",
+    "ActionResult",
+    # Models
+    "ActionType",
+    "ApprovalDeniedError",
+    "ApprovalRequest",
+    "ApprovalRequiredError",
+    "ApprovalResponse",
+    "ApprovalStatus",
+    "ApprovalTimeoutError",
+    "AuditEvent",
+    "AuditEventType",
+    # Audit
+    "AuditLogger",
+    "AutonomyConfig",
+    "AutonomyLevel",
+    "BudgetExceededError",
+    "BudgetScope",
+    "BudgetStatus",
+    "Checkpoint",
+    "CheckpointError",
+    "CheckpointType",
+    "ContentFilterError",
+    "EmergencyStopError",
+    "GuardianResult",
+    "LoopDetectedError",
+    "PermissionDeniedError",
+    "PermissionLevel",
+    "PolicyViolationError",
+    "RateLimitConfig",
+    "RateLimitExceededError",
+    "RateLimitStatus",
+    "ResourceType",
+    "RollbackError",
+    "RollbackResult",
+    # Configuration
+    "SafetyConfig",
+    "SafetyDecision",
+    # Exceptions
+    "SafetyError",
+    # Guardian
+    "SafetyGuardian",
+    "SafetyPolicy",
+    "SandboxError",
+    "SandboxTimeoutError",
+    "ValidationError",
+    "ValidationResult",
+    "ValidationRule",
+    "get_audit_logger",
+    "get_autonomy_config",
+    "get_default_policy",
+    "get_policy_for_autonomy_level",
+    "get_safety_config",
+    "get_safety_guardian",
+    "load_policies_from_directory",
+    "load_policy_from_file",
+    "reset_audit_logger",
+    "reset_config_cache",
+    "reset_safety_guardian",
+    "shutdown_audit_logger",
+    "shutdown_safety_guardian",
+]