forked from cardosofelipe/fast-next-template
Core safety framework architecture for autonomous agent guardrails: **Core Components:** - SafetyGuardian: Main orchestrator for all safety checks - AuditLogger: Comprehensive audit logging with hash chain tamper detection - SafetyConfig: Pydantic-based configuration - Models: Action requests, validation results, policies, checkpoints **Exception Hierarchy:** - SafetyError base with context preservation - Permission, Budget, RateLimit, Loop errors - Approval workflow errors (Required, Denied, Timeout) - Rollback, Sandbox, Emergency exceptions **Safety Policy System:** - Autonomy level based policies (FULL_CONTROL, MILESTONE, AUTONOMOUS) - Cost limits, rate limits, permission patterns - HITL approval requirements per action type - Configurable loop detection thresholds **Directory Structure:** - validation/, costs/, limits/, loops/ - Control subsystems - permissions/, rollback/, hitl/ - Access and recovery - content/, sandbox/, emergency/ - Protection systems - audit/, policies/ - Logging and configuration Phase A establishes the architecture. Subsystems to be implemented in Phase B-C. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
475 lines
18 KiB
Python
475 lines
18 KiB
Python
"""
|
|
Safety Framework Models
|
|
|
|
Core Pydantic models for actions, events, policies, and safety decisions.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any
|
|
from uuid import uuid4
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
# ============================================================================
|
|
# Enums
|
|
# ============================================================================
|
|
|
|
|
|
class ActionType(str, Enum):
|
|
"""Types of actions that can be performed."""
|
|
|
|
TOOL_CALL = "tool_call"
|
|
FILE_READ = "file_read"
|
|
FILE_WRITE = "file_write"
|
|
FILE_DELETE = "file_delete"
|
|
API_CALL = "api_call"
|
|
DATABASE_QUERY = "database_query"
|
|
DATABASE_MUTATE = "database_mutate"
|
|
GIT_OPERATION = "git_operation"
|
|
SHELL_COMMAND = "shell_command"
|
|
LLM_CALL = "llm_call"
|
|
NETWORK_REQUEST = "network_request"
|
|
CUSTOM = "custom"
|
|
|
|
|
|
class ResourceType(str, Enum):
|
|
"""Types of resources that can be accessed."""
|
|
|
|
FILE = "file"
|
|
DATABASE = "database"
|
|
API = "api"
|
|
NETWORK = "network"
|
|
GIT = "git"
|
|
SHELL = "shell"
|
|
LLM = "llm"
|
|
MEMORY = "memory"
|
|
CUSTOM = "custom"
|
|
|
|
|
|
class PermissionLevel(str, Enum):
|
|
"""Permission levels for resource access."""
|
|
|
|
NONE = "none"
|
|
READ = "read"
|
|
WRITE = "write"
|
|
EXECUTE = "execute"
|
|
DELETE = "delete"
|
|
ADMIN = "admin"
|
|
|
|
|
|
class AutonomyLevel(str, Enum):
|
|
"""Autonomy levels for agent operation."""
|
|
|
|
FULL_CONTROL = "full_control" # Approve every action
|
|
MILESTONE = "milestone" # Approve at milestones
|
|
AUTONOMOUS = "autonomous" # Only major decisions
|
|
|
|
|
|
class SafetyDecision(str, Enum):
|
|
"""Result of safety validation."""
|
|
|
|
ALLOW = "allow"
|
|
DENY = "deny"
|
|
REQUIRE_APPROVAL = "require_approval"
|
|
DELAY = "delay"
|
|
SANDBOX = "sandbox"
|
|
|
|
|
|
class ApprovalStatus(str, Enum):
|
|
"""Status of approval request."""
|
|
|
|
PENDING = "pending"
|
|
APPROVED = "approved"
|
|
DENIED = "denied"
|
|
TIMEOUT = "timeout"
|
|
CANCELLED = "cancelled"
|
|
|
|
|
|
class AuditEventType(str, Enum):
|
|
"""Types of audit events."""
|
|
|
|
ACTION_REQUESTED = "action_requested"
|
|
ACTION_VALIDATED = "action_validated"
|
|
ACTION_DENIED = "action_denied"
|
|
ACTION_EXECUTED = "action_executed"
|
|
ACTION_FAILED = "action_failed"
|
|
APPROVAL_REQUESTED = "approval_requested"
|
|
APPROVAL_GRANTED = "approval_granted"
|
|
APPROVAL_DENIED = "approval_denied"
|
|
APPROVAL_TIMEOUT = "approval_timeout"
|
|
CHECKPOINT_CREATED = "checkpoint_created"
|
|
ROLLBACK_STARTED = "rollback_started"
|
|
ROLLBACK_COMPLETED = "rollback_completed"
|
|
ROLLBACK_FAILED = "rollback_failed"
|
|
BUDGET_WARNING = "budget_warning"
|
|
BUDGET_EXCEEDED = "budget_exceeded"
|
|
RATE_LIMITED = "rate_limited"
|
|
LOOP_DETECTED = "loop_detected"
|
|
EMERGENCY_STOP = "emergency_stop"
|
|
POLICY_VIOLATION = "policy_violation"
|
|
CONTENT_FILTERED = "content_filtered"
|
|
|
|
|
|
# ============================================================================
|
|
# Action Models
|
|
# ============================================================================
|
|
|
|
|
|
class ActionMetadata(BaseModel):
|
|
"""Metadata associated with an action."""
|
|
|
|
agent_id: str = Field(..., description="ID of the agent performing the action")
|
|
project_id: str | None = Field(None, description="ID of the project context")
|
|
session_id: str | None = Field(None, description="ID of the current session")
|
|
task_id: str | None = Field(None, description="ID of the current task")
|
|
parent_action_id: str | None = Field(None, description="ID of the parent action")
|
|
correlation_id: str | None = Field(None, description="Correlation ID for tracing")
|
|
user_id: str | None = Field(None, description="ID of the user who initiated")
|
|
autonomy_level: AutonomyLevel = Field(
|
|
default=AutonomyLevel.MILESTONE,
|
|
description="Current autonomy level",
|
|
)
|
|
context: dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
description="Additional context",
|
|
)
|
|
|
|
|
|
class ActionRequest(BaseModel):
|
|
"""Request to perform an action."""
|
|
|
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
action_type: ActionType = Field(..., description="Type of action to perform")
|
|
tool_name: str | None = Field(None, description="Name of the tool to call")
|
|
resource: str | None = Field(None, description="Resource being accessed")
|
|
resource_type: ResourceType | None = Field(None, description="Type of resource")
|
|
arguments: dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
description="Action arguments",
|
|
)
|
|
metadata: ActionMetadata = Field(..., description="Action metadata")
|
|
estimated_cost_tokens: int = Field(0, description="Estimated token cost")
|
|
estimated_cost_usd: float = Field(0.0, description="Estimated USD cost")
|
|
is_destructive: bool = Field(False, description="Whether action is destructive")
|
|
is_reversible: bool = Field(True, description="Whether action can be rolled back")
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
class ActionResult(BaseModel):
|
|
"""Result of an executed action."""
|
|
|
|
action_id: str = Field(..., description="ID of the action")
|
|
success: bool = Field(..., description="Whether action succeeded")
|
|
data: Any = Field(None, description="Action result data")
|
|
error: str | None = Field(None, description="Error message if failed")
|
|
error_code: str | None = Field(None, description="Error code if failed")
|
|
execution_time_ms: float = Field(0.0, description="Execution time in ms")
|
|
actual_cost_tokens: int = Field(0, description="Actual token cost")
|
|
actual_cost_usd: float = Field(0.0, description="Actual USD cost")
|
|
checkpoint_id: str | None = Field(None, description="Checkpoint ID if created")
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
# ============================================================================
|
|
# Validation Models
|
|
# ============================================================================
|
|
|
|
|
|
class ValidationRule(BaseModel):
|
|
"""A single validation rule."""
|
|
|
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
name: str = Field(..., description="Rule name")
|
|
description: str | None = Field(None, description="Rule description")
|
|
priority: int = Field(0, description="Rule priority (higher = evaluated first)")
|
|
enabled: bool = Field(True, description="Whether rule is enabled")
|
|
|
|
# Rule conditions
|
|
action_types: list[ActionType] | None = Field(
|
|
None, description="Action types this rule applies to"
|
|
)
|
|
tool_patterns: list[str] | None = Field(
|
|
None, description="Tool name patterns (supports wildcards)"
|
|
)
|
|
resource_patterns: list[str] | None = Field(
|
|
None, description="Resource patterns (supports wildcards)"
|
|
)
|
|
agent_ids: list[str] | None = Field(
|
|
None, description="Agent IDs this rule applies to"
|
|
)
|
|
|
|
# Rule decision
|
|
decision: SafetyDecision = Field(..., description="Decision when rule matches")
|
|
reason: str | None = Field(None, description="Reason for decision")
|
|
|
|
|
|
class ValidationResult(BaseModel):
|
|
"""Result of action validation."""
|
|
|
|
action_id: str = Field(..., description="ID of the validated action")
|
|
decision: SafetyDecision = Field(..., description="Validation decision")
|
|
applied_rules: list[str] = Field(
|
|
default_factory=list, description="IDs of applied rules"
|
|
)
|
|
reasons: list[str] = Field(
|
|
default_factory=list, description="Reasons for decision"
|
|
)
|
|
approval_id: str | None = Field(None, description="Approval request ID if needed")
|
|
retry_after_seconds: float | None = Field(
|
|
None, description="Retry delay if rate limited"
|
|
)
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
# ============================================================================
|
|
# Budget Models
|
|
# ============================================================================
|
|
|
|
|
|
class BudgetScope(str, Enum):
|
|
"""Scope of a budget limit."""
|
|
|
|
SESSION = "session"
|
|
DAILY = "daily"
|
|
WEEKLY = "weekly"
|
|
MONTHLY = "monthly"
|
|
PROJECT = "project"
|
|
AGENT = "agent"
|
|
|
|
|
|
class BudgetStatus(BaseModel):
|
|
"""Current budget status."""
|
|
|
|
scope: BudgetScope = Field(..., description="Budget scope")
|
|
scope_id: str = Field(..., description="ID within scope (session/agent/project)")
|
|
tokens_used: int = Field(0, description="Tokens used in this scope")
|
|
tokens_limit: int = Field(100000, description="Token limit for this scope")
|
|
cost_used_usd: float = Field(0.0, description="USD spent in this scope")
|
|
cost_limit_usd: float = Field(10.0, description="USD limit for this scope")
|
|
tokens_remaining: int = Field(0, description="Remaining tokens")
|
|
cost_remaining_usd: float = Field(0.0, description="Remaining USD budget")
|
|
warning_threshold: float = Field(0.8, description="Warn at this usage fraction")
|
|
is_warning: bool = Field(False, description="Whether at warning level")
|
|
is_exceeded: bool = Field(False, description="Whether budget exceeded")
|
|
reset_at: datetime | None = Field(None, description="When budget resets")
|
|
|
|
|
|
# ============================================================================
|
|
# Rate Limit Models
|
|
# ============================================================================
|
|
|
|
|
|
class RateLimitConfig(BaseModel):
|
|
"""Configuration for a rate limit."""
|
|
|
|
name: str = Field(..., description="Rate limit name")
|
|
limit: int = Field(..., description="Maximum allowed in window")
|
|
window_seconds: int = Field(60, description="Time window in seconds")
|
|
burst_limit: int | None = Field(None, description="Burst allowance")
|
|
slowdown_threshold: float = Field(
|
|
0.8, description="Start slowing at this fraction"
|
|
)
|
|
|
|
|
|
class RateLimitStatus(BaseModel):
|
|
"""Current rate limit status."""
|
|
|
|
name: str = Field(..., description="Rate limit name")
|
|
current_count: int = Field(0, description="Current count in window")
|
|
limit: int = Field(..., description="Maximum allowed")
|
|
window_seconds: int = Field(..., description="Time window")
|
|
remaining: int = Field(..., description="Remaining in window")
|
|
reset_at: datetime = Field(..., description="When window resets")
|
|
is_limited: bool = Field(False, description="Whether currently limited")
|
|
retry_after_seconds: float = Field(0.0, description="Seconds until retry")
|
|
|
|
|
|
# ============================================================================
|
|
# Approval Models
|
|
# ============================================================================
|
|
|
|
|
|
class ApprovalRequest(BaseModel):
|
|
"""Request for human approval."""
|
|
|
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
action: ActionRequest = Field(..., description="Action requiring approval")
|
|
reason: str = Field(..., description="Why approval is required")
|
|
urgency: str = Field("normal", description="Urgency level")
|
|
timeout_seconds: int = Field(300, description="Timeout for approval")
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
expires_at: datetime | None = Field(None, description="When request expires")
|
|
suggested_action: str | None = Field(None, description="Suggested response")
|
|
context: dict[str, Any] = Field(default_factory=dict, description="Extra context")
|
|
|
|
|
|
class ApprovalResponse(BaseModel):
|
|
"""Response to an approval request."""
|
|
|
|
request_id: str = Field(..., description="ID of the approval request")
|
|
status: ApprovalStatus = Field(..., description="Approval status")
|
|
decided_by: str | None = Field(None, description="Who made the decision")
|
|
reason: str | None = Field(None, description="Reason for decision")
|
|
modifications: dict[str, Any] | None = Field(
|
|
None, description="Modifications to action"
|
|
)
|
|
decided_at: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
# ============================================================================
|
|
# Checkpoint/Rollback Models
|
|
# ============================================================================
|
|
|
|
|
|
class CheckpointType(str, Enum):
|
|
"""Types of checkpoints."""
|
|
|
|
FILE = "file"
|
|
DATABASE = "database"
|
|
GIT = "git"
|
|
COMPOSITE = "composite"
|
|
|
|
|
|
class Checkpoint(BaseModel):
|
|
"""A rollback checkpoint."""
|
|
|
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
checkpoint_type: CheckpointType = Field(..., description="Type of checkpoint")
|
|
action_id: str = Field(..., description="Action this checkpoint is for")
|
|
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
expires_at: datetime | None = Field(None, description="When checkpoint expires")
|
|
data: dict[str, Any] = Field(default_factory=dict, description="Checkpoint data")
|
|
description: str | None = Field(None, description="Description of checkpoint")
|
|
is_valid: bool = Field(True, description="Whether checkpoint is still valid")
|
|
|
|
|
|
class RollbackResult(BaseModel):
|
|
"""Result of a rollback operation."""
|
|
|
|
checkpoint_id: str = Field(..., description="ID of checkpoint rolled back to")
|
|
success: bool = Field(..., description="Whether rollback succeeded")
|
|
actions_rolled_back: list[str] = Field(
|
|
default_factory=list, description="IDs of rolled back actions"
|
|
)
|
|
failed_actions: list[str] = Field(
|
|
default_factory=list, description="IDs of actions that failed to rollback"
|
|
)
|
|
error: str | None = Field(None, description="Error message if failed")
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
|
# ============================================================================
|
|
# Audit Models
|
|
# ============================================================================
|
|
|
|
|
|
class AuditEvent(BaseModel):
|
|
"""An audit log event."""
|
|
|
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
event_type: AuditEventType = Field(..., description="Type of audit event")
|
|
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
agent_id: str | None = Field(None, description="Agent ID if applicable")
|
|
action_id: str | None = Field(None, description="Action ID if applicable")
|
|
project_id: str | None = Field(None, description="Project ID if applicable")
|
|
session_id: str | None = Field(None, description="Session ID if applicable")
|
|
user_id: str | None = Field(None, description="User ID if applicable")
|
|
decision: SafetyDecision | None = Field(None, description="Safety decision")
|
|
details: dict[str, Any] = Field(default_factory=dict, description="Event details")
|
|
correlation_id: str | None = Field(None, description="Correlation ID for tracing")
|
|
|
|
|
|
# ============================================================================
|
|
# Policy Models
|
|
# ============================================================================
|
|
|
|
|
|
class SafetyPolicy(BaseModel):
|
|
"""A complete safety policy configuration."""
|
|
|
|
name: str = Field(..., description="Policy name")
|
|
description: str | None = Field(None, description="Policy description")
|
|
version: str = Field("1.0.0", description="Policy version")
|
|
enabled: bool = Field(True, description="Whether policy is enabled")
|
|
|
|
# Cost controls
|
|
max_tokens_per_session: int = Field(100_000, description="Max tokens per session")
|
|
max_tokens_per_day: int = Field(1_000_000, description="Max tokens per day")
|
|
max_cost_per_session_usd: float = Field(10.0, description="Max USD per session")
|
|
max_cost_per_day_usd: float = Field(100.0, description="Max USD per day")
|
|
|
|
# Rate limits
|
|
max_actions_per_minute: int = Field(60, description="Max actions per minute")
|
|
max_llm_calls_per_minute: int = Field(20, description="Max LLM calls per minute")
|
|
max_file_operations_per_minute: int = Field(
|
|
100, description="Max file ops per minute"
|
|
)
|
|
|
|
# Permissions
|
|
allowed_tools: list[str] = Field(
|
|
default_factory=lambda: ["*"],
|
|
description="Allowed tool patterns",
|
|
)
|
|
denied_tools: list[str] = Field(
|
|
default_factory=list,
|
|
description="Denied tool patterns",
|
|
)
|
|
allowed_file_patterns: list[str] = Field(
|
|
default_factory=lambda: ["**/*"],
|
|
description="Allowed file patterns",
|
|
)
|
|
denied_file_patterns: list[str] = Field(
|
|
default_factory=lambda: ["**/.env", "**/secrets/**"],
|
|
description="Denied file patterns",
|
|
)
|
|
|
|
# HITL
|
|
require_approval_for: list[str] = Field(
|
|
default_factory=lambda: [
|
|
"delete_file",
|
|
"push_to_remote",
|
|
"deploy_to_production",
|
|
"modify_critical_config",
|
|
],
|
|
description="Actions requiring approval",
|
|
)
|
|
|
|
# Loop detection
|
|
max_repeated_actions: int = Field(5, description="Max exact repetitions")
|
|
max_similar_actions: int = Field(10, description="Max similar actions")
|
|
|
|
# Sandbox
|
|
require_sandbox: bool = Field(False, description="Require sandbox execution")
|
|
sandbox_timeout_seconds: int = Field(300, description="Sandbox timeout")
|
|
sandbox_memory_mb: int = Field(1024, description="Sandbox memory limit")
|
|
|
|
# Validation rules
|
|
validation_rules: list[ValidationRule] = Field(
|
|
default_factory=list,
|
|
description="Custom validation rules",
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# Guardian Result Models
|
|
# ============================================================================
|
|
|
|
|
|
class GuardianResult(BaseModel):
|
|
"""Result of SafetyGuardian evaluation."""
|
|
|
|
action_id: str = Field(..., description="ID of the action")
|
|
allowed: bool = Field(..., description="Whether action is allowed")
|
|
decision: SafetyDecision = Field(..., description="Safety decision")
|
|
reasons: list[str] = Field(default_factory=list, description="Decision reasons")
|
|
approval_id: str | None = Field(None, description="Approval ID if needed")
|
|
checkpoint_id: str | None = Field(None, description="Checkpoint ID if created")
|
|
retry_after_seconds: float | None = Field(None, description="Retry delay")
|
|
modified_action: ActionRequest | None = Field(
|
|
None, description="Modified action if changed"
|
|
)
|
|
audit_events: list[AuditEvent] = Field(
|
|
default_factory=list, description="Generated audit events"
|
|
)
|