forked from cardosofelipe/fast-next-template
feat(backend): add safety framework foundation (Phase A) (#63)
Core safety framework architecture for autonomous agent guardrails: **Core Components:** - SafetyGuardian: Main orchestrator for all safety checks - AuditLogger: Comprehensive audit logging with hash chain tamper detection - SafetyConfig: Pydantic-based configuration - Models: Action requests, validation results, policies, checkpoints **Exception Hierarchy:** - SafetyError base with context preservation - Permission, Budget, RateLimit, Loop errors - Approval workflow errors (Required, Denied, Timeout) - Rollback, Sandbox, Emergency exceptions **Safety Policy System:** - Autonomy level based policies (FULL_CONTROL, MILESTONE, AUTONOMOUS) - Cost limits, rate limits, permission patterns - HITL approval requirements per action type - Configurable loop detection thresholds **Directory Structure:** - validation/, costs/, limits/, loops/ - Control subsystems - permissions/, rollback/, hitl/ - Access and recovery - content/, sandbox/, emergency/ - Protection systems - audit/, policies/ - Logging and configuration Phase A establishes the architecture. Subsystems to be implemented in Phase B-C. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
474
backend/app/services/safety/models.py
Normal file
474
backend/app/services/safety/models.py
Normal file
@@ -0,0 +1,474 @@
|
||||
"""
|
||||
Safety Framework Models
|
||||
|
||||
Core Pydantic models for actions, events, policies, and safety decisions.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# ============================================================================
|
||||
# Enums
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class ActionType(str, Enum):
|
||||
"""Types of actions that can be performed."""
|
||||
|
||||
TOOL_CALL = "tool_call"
|
||||
FILE_READ = "file_read"
|
||||
FILE_WRITE = "file_write"
|
||||
FILE_DELETE = "file_delete"
|
||||
API_CALL = "api_call"
|
||||
DATABASE_QUERY = "database_query"
|
||||
DATABASE_MUTATE = "database_mutate"
|
||||
GIT_OPERATION = "git_operation"
|
||||
SHELL_COMMAND = "shell_command"
|
||||
LLM_CALL = "llm_call"
|
||||
NETWORK_REQUEST = "network_request"
|
||||
CUSTOM = "custom"
|
||||
|
||||
|
||||
class ResourceType(str, Enum):
|
||||
"""Types of resources that can be accessed."""
|
||||
|
||||
FILE = "file"
|
||||
DATABASE = "database"
|
||||
API = "api"
|
||||
NETWORK = "network"
|
||||
GIT = "git"
|
||||
SHELL = "shell"
|
||||
LLM = "llm"
|
||||
MEMORY = "memory"
|
||||
CUSTOM = "custom"
|
||||
|
||||
|
||||
class PermissionLevel(str, Enum):
|
||||
"""Permission levels for resource access."""
|
||||
|
||||
NONE = "none"
|
||||
READ = "read"
|
||||
WRITE = "write"
|
||||
EXECUTE = "execute"
|
||||
DELETE = "delete"
|
||||
ADMIN = "admin"
|
||||
|
||||
|
||||
class AutonomyLevel(str, Enum):
|
||||
"""Autonomy levels for agent operation."""
|
||||
|
||||
FULL_CONTROL = "full_control" # Approve every action
|
||||
MILESTONE = "milestone" # Approve at milestones
|
||||
AUTONOMOUS = "autonomous" # Only major decisions
|
||||
|
||||
|
||||
class SafetyDecision(str, Enum):
|
||||
"""Result of safety validation."""
|
||||
|
||||
ALLOW = "allow"
|
||||
DENY = "deny"
|
||||
REQUIRE_APPROVAL = "require_approval"
|
||||
DELAY = "delay"
|
||||
SANDBOX = "sandbox"
|
||||
|
||||
|
||||
class ApprovalStatus(str, Enum):
|
||||
"""Status of approval request."""
|
||||
|
||||
PENDING = "pending"
|
||||
APPROVED = "approved"
|
||||
DENIED = "denied"
|
||||
TIMEOUT = "timeout"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
class AuditEventType(str, Enum):
|
||||
"""Types of audit events."""
|
||||
|
||||
ACTION_REQUESTED = "action_requested"
|
||||
ACTION_VALIDATED = "action_validated"
|
||||
ACTION_DENIED = "action_denied"
|
||||
ACTION_EXECUTED = "action_executed"
|
||||
ACTION_FAILED = "action_failed"
|
||||
APPROVAL_REQUESTED = "approval_requested"
|
||||
APPROVAL_GRANTED = "approval_granted"
|
||||
APPROVAL_DENIED = "approval_denied"
|
||||
APPROVAL_TIMEOUT = "approval_timeout"
|
||||
CHECKPOINT_CREATED = "checkpoint_created"
|
||||
ROLLBACK_STARTED = "rollback_started"
|
||||
ROLLBACK_COMPLETED = "rollback_completed"
|
||||
ROLLBACK_FAILED = "rollback_failed"
|
||||
BUDGET_WARNING = "budget_warning"
|
||||
BUDGET_EXCEEDED = "budget_exceeded"
|
||||
RATE_LIMITED = "rate_limited"
|
||||
LOOP_DETECTED = "loop_detected"
|
||||
EMERGENCY_STOP = "emergency_stop"
|
||||
POLICY_VIOLATION = "policy_violation"
|
||||
CONTENT_FILTERED = "content_filtered"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Action Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class ActionMetadata(BaseModel):
|
||||
"""Metadata associated with an action."""
|
||||
|
||||
agent_id: str = Field(..., description="ID of the agent performing the action")
|
||||
project_id: str | None = Field(None, description="ID of the project context")
|
||||
session_id: str | None = Field(None, description="ID of the current session")
|
||||
task_id: str | None = Field(None, description="ID of the current task")
|
||||
parent_action_id: str | None = Field(None, description="ID of the parent action")
|
||||
correlation_id: str | None = Field(None, description="Correlation ID for tracing")
|
||||
user_id: str | None = Field(None, description="ID of the user who initiated")
|
||||
autonomy_level: AutonomyLevel = Field(
|
||||
default=AutonomyLevel.MILESTONE,
|
||||
description="Current autonomy level",
|
||||
)
|
||||
context: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Additional context",
|
||||
)
|
||||
|
||||
|
||||
class ActionRequest(BaseModel):
|
||||
"""Request to perform an action."""
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
action_type: ActionType = Field(..., description="Type of action to perform")
|
||||
tool_name: str | None = Field(None, description="Name of the tool to call")
|
||||
resource: str | None = Field(None, description="Resource being accessed")
|
||||
resource_type: ResourceType | None = Field(None, description="Type of resource")
|
||||
arguments: dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Action arguments",
|
||||
)
|
||||
metadata: ActionMetadata = Field(..., description="Action metadata")
|
||||
estimated_cost_tokens: int = Field(0, description="Estimated token cost")
|
||||
estimated_cost_usd: float = Field(0.0, description="Estimated USD cost")
|
||||
is_destructive: bool = Field(False, description="Whether action is destructive")
|
||||
is_reversible: bool = Field(True, description="Whether action can be rolled back")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
class ActionResult(BaseModel):
|
||||
"""Result of an executed action."""
|
||||
|
||||
action_id: str = Field(..., description="ID of the action")
|
||||
success: bool = Field(..., description="Whether action succeeded")
|
||||
data: Any = Field(None, description="Action result data")
|
||||
error: str | None = Field(None, description="Error message if failed")
|
||||
error_code: str | None = Field(None, description="Error code if failed")
|
||||
execution_time_ms: float = Field(0.0, description="Execution time in ms")
|
||||
actual_cost_tokens: int = Field(0, description="Actual token cost")
|
||||
actual_cost_usd: float = Field(0.0, description="Actual USD cost")
|
||||
checkpoint_id: str | None = Field(None, description="Checkpoint ID if created")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Validation Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class ValidationRule(BaseModel):
|
||||
"""A single validation rule."""
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
name: str = Field(..., description="Rule name")
|
||||
description: str | None = Field(None, description="Rule description")
|
||||
priority: int = Field(0, description="Rule priority (higher = evaluated first)")
|
||||
enabled: bool = Field(True, description="Whether rule is enabled")
|
||||
|
||||
# Rule conditions
|
||||
action_types: list[ActionType] | None = Field(
|
||||
None, description="Action types this rule applies to"
|
||||
)
|
||||
tool_patterns: list[str] | None = Field(
|
||||
None, description="Tool name patterns (supports wildcards)"
|
||||
)
|
||||
resource_patterns: list[str] | None = Field(
|
||||
None, description="Resource patterns (supports wildcards)"
|
||||
)
|
||||
agent_ids: list[str] | None = Field(
|
||||
None, description="Agent IDs this rule applies to"
|
||||
)
|
||||
|
||||
# Rule decision
|
||||
decision: SafetyDecision = Field(..., description="Decision when rule matches")
|
||||
reason: str | None = Field(None, description="Reason for decision")
|
||||
|
||||
|
||||
class ValidationResult(BaseModel):
|
||||
"""Result of action validation."""
|
||||
|
||||
action_id: str = Field(..., description="ID of the validated action")
|
||||
decision: SafetyDecision = Field(..., description="Validation decision")
|
||||
applied_rules: list[str] = Field(
|
||||
default_factory=list, description="IDs of applied rules"
|
||||
)
|
||||
reasons: list[str] = Field(
|
||||
default_factory=list, description="Reasons for decision"
|
||||
)
|
||||
approval_id: str | None = Field(None, description="Approval request ID if needed")
|
||||
retry_after_seconds: float | None = Field(
|
||||
None, description="Retry delay if rate limited"
|
||||
)
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Budget Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class BudgetScope(str, Enum):
|
||||
"""Scope of a budget limit."""
|
||||
|
||||
SESSION = "session"
|
||||
DAILY = "daily"
|
||||
WEEKLY = "weekly"
|
||||
MONTHLY = "monthly"
|
||||
PROJECT = "project"
|
||||
AGENT = "agent"
|
||||
|
||||
|
||||
class BudgetStatus(BaseModel):
|
||||
"""Current budget status."""
|
||||
|
||||
scope: BudgetScope = Field(..., description="Budget scope")
|
||||
scope_id: str = Field(..., description="ID within scope (session/agent/project)")
|
||||
tokens_used: int = Field(0, description="Tokens used in this scope")
|
||||
tokens_limit: int = Field(100000, description="Token limit for this scope")
|
||||
cost_used_usd: float = Field(0.0, description="USD spent in this scope")
|
||||
cost_limit_usd: float = Field(10.0, description="USD limit for this scope")
|
||||
tokens_remaining: int = Field(0, description="Remaining tokens")
|
||||
cost_remaining_usd: float = Field(0.0, description="Remaining USD budget")
|
||||
warning_threshold: float = Field(0.8, description="Warn at this usage fraction")
|
||||
is_warning: bool = Field(False, description="Whether at warning level")
|
||||
is_exceeded: bool = Field(False, description="Whether budget exceeded")
|
||||
reset_at: datetime | None = Field(None, description="When budget resets")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Rate Limit Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class RateLimitConfig(BaseModel):
|
||||
"""Configuration for a rate limit."""
|
||||
|
||||
name: str = Field(..., description="Rate limit name")
|
||||
limit: int = Field(..., description="Maximum allowed in window")
|
||||
window_seconds: int = Field(60, description="Time window in seconds")
|
||||
burst_limit: int | None = Field(None, description="Burst allowance")
|
||||
slowdown_threshold: float = Field(
|
||||
0.8, description="Start slowing at this fraction"
|
||||
)
|
||||
|
||||
|
||||
class RateLimitStatus(BaseModel):
|
||||
"""Current rate limit status."""
|
||||
|
||||
name: str = Field(..., description="Rate limit name")
|
||||
current_count: int = Field(0, description="Current count in window")
|
||||
limit: int = Field(..., description="Maximum allowed")
|
||||
window_seconds: int = Field(..., description="Time window")
|
||||
remaining: int = Field(..., description="Remaining in window")
|
||||
reset_at: datetime = Field(..., description="When window resets")
|
||||
is_limited: bool = Field(False, description="Whether currently limited")
|
||||
retry_after_seconds: float = Field(0.0, description="Seconds until retry")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Approval Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class ApprovalRequest(BaseModel):
|
||||
"""Request for human approval."""
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
action: ActionRequest = Field(..., description="Action requiring approval")
|
||||
reason: str = Field(..., description="Why approval is required")
|
||||
urgency: str = Field("normal", description="Urgency level")
|
||||
timeout_seconds: int = Field(300, description="Timeout for approval")
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
expires_at: datetime | None = Field(None, description="When request expires")
|
||||
suggested_action: str | None = Field(None, description="Suggested response")
|
||||
context: dict[str, Any] = Field(default_factory=dict, description="Extra context")
|
||||
|
||||
|
||||
class ApprovalResponse(BaseModel):
|
||||
"""Response to an approval request."""
|
||||
|
||||
request_id: str = Field(..., description="ID of the approval request")
|
||||
status: ApprovalStatus = Field(..., description="Approval status")
|
||||
decided_by: str | None = Field(None, description="Who made the decision")
|
||||
reason: str | None = Field(None, description="Reason for decision")
|
||||
modifications: dict[str, Any] | None = Field(
|
||||
None, description="Modifications to action"
|
||||
)
|
||||
decided_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Checkpoint/Rollback Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class CheckpointType(str, Enum):
|
||||
"""Types of checkpoints."""
|
||||
|
||||
FILE = "file"
|
||||
DATABASE = "database"
|
||||
GIT = "git"
|
||||
COMPOSITE = "composite"
|
||||
|
||||
|
||||
class Checkpoint(BaseModel):
|
||||
"""A rollback checkpoint."""
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
checkpoint_type: CheckpointType = Field(..., description="Type of checkpoint")
|
||||
action_id: str = Field(..., description="Action this checkpoint is for")
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
expires_at: datetime | None = Field(None, description="When checkpoint expires")
|
||||
data: dict[str, Any] = Field(default_factory=dict, description="Checkpoint data")
|
||||
description: str | None = Field(None, description="Description of checkpoint")
|
||||
is_valid: bool = Field(True, description="Whether checkpoint is still valid")
|
||||
|
||||
|
||||
class RollbackResult(BaseModel):
|
||||
"""Result of a rollback operation."""
|
||||
|
||||
checkpoint_id: str = Field(..., description="ID of checkpoint rolled back to")
|
||||
success: bool = Field(..., description="Whether rollback succeeded")
|
||||
actions_rolled_back: list[str] = Field(
|
||||
default_factory=list, description="IDs of rolled back actions"
|
||||
)
|
||||
failed_actions: list[str] = Field(
|
||||
default_factory=list, description="IDs of actions that failed to rollback"
|
||||
)
|
||||
error: str | None = Field(None, description="Error message if failed")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audit Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class AuditEvent(BaseModel):
|
||||
"""An audit log event."""
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
event_type: AuditEventType = Field(..., description="Type of audit event")
|
||||
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
||||
agent_id: str | None = Field(None, description="Agent ID if applicable")
|
||||
action_id: str | None = Field(None, description="Action ID if applicable")
|
||||
project_id: str | None = Field(None, description="Project ID if applicable")
|
||||
session_id: str | None = Field(None, description="Session ID if applicable")
|
||||
user_id: str | None = Field(None, description="User ID if applicable")
|
||||
decision: SafetyDecision | None = Field(None, description="Safety decision")
|
||||
details: dict[str, Any] = Field(default_factory=dict, description="Event details")
|
||||
correlation_id: str | None = Field(None, description="Correlation ID for tracing")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Policy Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class SafetyPolicy(BaseModel):
|
||||
"""A complete safety policy configuration."""
|
||||
|
||||
name: str = Field(..., description="Policy name")
|
||||
description: str | None = Field(None, description="Policy description")
|
||||
version: str = Field("1.0.0", description="Policy version")
|
||||
enabled: bool = Field(True, description="Whether policy is enabled")
|
||||
|
||||
# Cost controls
|
||||
max_tokens_per_session: int = Field(100_000, description="Max tokens per session")
|
||||
max_tokens_per_day: int = Field(1_000_000, description="Max tokens per day")
|
||||
max_cost_per_session_usd: float = Field(10.0, description="Max USD per session")
|
||||
max_cost_per_day_usd: float = Field(100.0, description="Max USD per day")
|
||||
|
||||
# Rate limits
|
||||
max_actions_per_minute: int = Field(60, description="Max actions per minute")
|
||||
max_llm_calls_per_minute: int = Field(20, description="Max LLM calls per minute")
|
||||
max_file_operations_per_minute: int = Field(
|
||||
100, description="Max file ops per minute"
|
||||
)
|
||||
|
||||
# Permissions
|
||||
allowed_tools: list[str] = Field(
|
||||
default_factory=lambda: ["*"],
|
||||
description="Allowed tool patterns",
|
||||
)
|
||||
denied_tools: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Denied tool patterns",
|
||||
)
|
||||
allowed_file_patterns: list[str] = Field(
|
||||
default_factory=lambda: ["**/*"],
|
||||
description="Allowed file patterns",
|
||||
)
|
||||
denied_file_patterns: list[str] = Field(
|
||||
default_factory=lambda: ["**/.env", "**/secrets/**"],
|
||||
description="Denied file patterns",
|
||||
)
|
||||
|
||||
# HITL
|
||||
require_approval_for: list[str] = Field(
|
||||
default_factory=lambda: [
|
||||
"delete_file",
|
||||
"push_to_remote",
|
||||
"deploy_to_production",
|
||||
"modify_critical_config",
|
||||
],
|
||||
description="Actions requiring approval",
|
||||
)
|
||||
|
||||
# Loop detection
|
||||
max_repeated_actions: int = Field(5, description="Max exact repetitions")
|
||||
max_similar_actions: int = Field(10, description="Max similar actions")
|
||||
|
||||
# Sandbox
|
||||
require_sandbox: bool = Field(False, description="Require sandbox execution")
|
||||
sandbox_timeout_seconds: int = Field(300, description="Sandbox timeout")
|
||||
sandbox_memory_mb: int = Field(1024, description="Sandbox memory limit")
|
||||
|
||||
# Validation rules
|
||||
validation_rules: list[ValidationRule] = Field(
|
||||
default_factory=list,
|
||||
description="Custom validation rules",
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Guardian Result Models
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class GuardianResult(BaseModel):
|
||||
"""Result of SafetyGuardian evaluation."""
|
||||
|
||||
action_id: str = Field(..., description="ID of the action")
|
||||
allowed: bool = Field(..., description="Whether action is allowed")
|
||||
decision: SafetyDecision = Field(..., description="Safety decision")
|
||||
reasons: list[str] = Field(default_factory=list, description="Decision reasons")
|
||||
approval_id: str | None = Field(None, description="Approval ID if needed")
|
||||
checkpoint_id: str | None = Field(None, description="Checkpoint ID if created")
|
||||
retry_after_seconds: float | None = Field(None, description="Retry delay")
|
||||
modified_action: ActionRequest | None = Field(
|
||||
None, description="Modified action if changed"
|
||||
)
|
||||
audit_events: list[AuditEvent] = Field(
|
||||
default_factory=list, description="Generated audit events"
|
||||
)
|
||||
Reference in New Issue
Block a user