# app/api/routes/agents.py """ Agent Instance management endpoints for Syndarix projects. These endpoints allow project owners and superusers to manage AI agent instances within their projects, including spawning, pausing, resuming, and terminating agents. """ import logging import os from typing import Any from uuid import UUID from fastapi import APIRouter, Depends, Query, Request, status from slowapi import Limiter from slowapi.util import get_remote_address from sqlalchemy.ext.asyncio import AsyncSession from app.api.dependencies.auth import get_current_user from app.core.database import get_db from app.core.exceptions import ( AuthorizationError, NotFoundError, ValidationException, ) from app.crud.syndarix.agent_instance import agent_instance as agent_instance_crud from app.crud.syndarix.agent_type import agent_type as agent_type_crud from app.crud.syndarix.project import project as project_crud from app.models.syndarix import AgentInstance, Project from app.models.syndarix.enums import AgentStatus from app.models.user import User from app.schemas.common import ( MessageResponse, PaginatedResponse, PaginationParams, create_pagination_meta, ) from app.schemas.errors import ErrorCode from app.schemas.syndarix.agent_instance import ( AgentInstanceCreate, AgentInstanceMetrics, AgentInstanceResponse, AgentInstanceUpdate, ) router = APIRouter() logger = logging.getLogger(__name__) # Initialize limiter for this router limiter = Limiter(key_func=get_remote_address) # Use higher rate limits in test environment IS_TEST = os.getenv("IS_TEST", "False") == "True" RATE_MULTIPLIER = 100 if IS_TEST else 1 # Valid status transitions for agent lifecycle management VALID_STATUS_TRANSITIONS: dict[AgentStatus, set[AgentStatus]] = { AgentStatus.IDLE: {AgentStatus.WORKING, AgentStatus.PAUSED, AgentStatus.TERMINATED}, AgentStatus.WORKING: { AgentStatus.IDLE, AgentStatus.WAITING, AgentStatus.PAUSED, AgentStatus.TERMINATED, }, AgentStatus.WAITING: { AgentStatus.IDLE, AgentStatus.WORKING, AgentStatus.PAUSED, AgentStatus.TERMINATED, }, AgentStatus.PAUSED: {AgentStatus.IDLE, AgentStatus.TERMINATED}, AgentStatus.TERMINATED: set(), # Terminal state, no transitions allowed } async def verify_project_access( db: AsyncSession, project_id: UUID, user: User, ) -> Project: """ Verify user has access to a project. Args: db: Database session project_id: UUID of the project to verify user: Current authenticated user Returns: Project: The project if access is granted Raises: NotFoundError: If the project does not exist AuthorizationError: If the user does not have access to the project """ project = await project_crud.get(db, id=project_id) if not project: raise NotFoundError( message=f"Project {project_id} not found", error_code=ErrorCode.NOT_FOUND, ) if not user.is_superuser and project.owner_id != user.id: raise AuthorizationError( message="You do not have access to this project", error_code=ErrorCode.INSUFFICIENT_PERMISSIONS, ) return project def validate_status_transition( current_status: AgentStatus, target_status: AgentStatus, ) -> None: """ Validate that a status transition is allowed. Args: current_status: The agent's current status target_status: The desired target status Raises: ValidationException: If the transition is not allowed """ valid_targets = VALID_STATUS_TRANSITIONS.get(current_status, set()) if target_status not in valid_targets: raise ValidationException( message=f"Cannot transition from {current_status.value} to {target_status.value}", error_code=ErrorCode.VALIDATION_ERROR, field="status", ) def build_agent_response( agent: AgentInstance, agent_type_name: str | None = None, agent_type_slug: str | None = None, project_name: str | None = None, project_slug: str | None = None, assigned_issues_count: int = 0, ) -> AgentInstanceResponse: """ Build an AgentInstanceResponse from an AgentInstance model. Args: agent: The agent instance model agent_type_name: Name of the agent type agent_type_slug: Slug of the agent type project_name: Name of the project project_slug: Slug of the project assigned_issues_count: Number of issues assigned to this agent Returns: AgentInstanceResponse: The response schema """ return AgentInstanceResponse( id=agent.id, agent_type_id=agent.agent_type_id, project_id=agent.project_id, name=agent.name, status=agent.status, current_task=agent.current_task, short_term_memory=agent.short_term_memory or {}, long_term_memory_ref=agent.long_term_memory_ref, session_id=agent.session_id, last_activity_at=agent.last_activity_at, terminated_at=agent.terminated_at, tasks_completed=agent.tasks_completed, tokens_used=agent.tokens_used, cost_incurred=agent.cost_incurred, created_at=agent.created_at, updated_at=agent.updated_at, agent_type_name=agent_type_name, agent_type_slug=agent_type_slug, project_name=project_name, project_slug=project_slug, assigned_issues_count=assigned_issues_count, ) # ===== Agent Instance Management Endpoints ===== @router.post( "/projects/{project_id}/agents", response_model=AgentInstanceResponse, status_code=status.HTTP_201_CREATED, summary="Spawn Agent Instance", description="Spawn a new agent instance in a project. Requires project ownership or superuser.", operation_id="spawn_agent", ) @limiter.limit(f"{20 * RATE_MULTIPLIER}/minute") async def spawn_agent( request: Request, project_id: UUID, agent_in: AgentInstanceCreate, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Spawn a new agent instance in a project. Creates a new agent instance from an agent type template and assigns it to the specified project. The agent starts in IDLE status by default. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project to spawn the agent in agent_in: Agent instance creation data current_user: Current authenticated user db: Database session Returns: AgentInstanceResponse: The newly created agent instance Raises: NotFoundError: If the project is not found AuthorizationError: If the user lacks access to the project ValidationException: If the agent creation data is invalid """ try: # Verify project access project = await verify_project_access(db, project_id, current_user) # Ensure the agent is being created for the correct project if agent_in.project_id != project_id: raise ValidationException( message="Agent project_id must match the URL project_id", error_code=ErrorCode.VALIDATION_ERROR, field="project_id", ) # Validate that the agent type exists and is active agent_type = await agent_type_crud.get(db, id=agent_in.agent_type_id) if not agent_type: raise NotFoundError( message=f"Agent type {agent_in.agent_type_id} not found", error_code=ErrorCode.NOT_FOUND, ) if not agent_type.is_active: raise ValidationException( message=f"Agent type '{agent_type.name}' is inactive and cannot be used", error_code=ErrorCode.VALIDATION_ERROR, field="agent_type_id", ) # Create the agent instance agent = await agent_instance_crud.create(db, obj_in=agent_in) logger.info( f"User {current_user.email} spawned agent '{agent.name}' " f"(id={agent.id}) in project {project.slug}" ) # Get agent details for response details = await agent_instance_crud.get_with_details(db, instance_id=agent.id) if details: return build_agent_response( agent=details["instance"], agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) return build_agent_response(agent) except (NotFoundError, AuthorizationError, ValidationException): raise except ValueError as e: logger.warning(f"Failed to spawn agent: {e!s}") raise ValidationException( message=str(e), error_code=ErrorCode.VALIDATION_ERROR, ) except Exception as e: logger.error(f"Error spawning agent: {e!s}", exc_info=True) raise @router.get( "/projects/{project_id}/agents", response_model=PaginatedResponse[AgentInstanceResponse], summary="List Project Agents", description="List all agent instances in a project with optional filtering.", operation_id="list_project_agents", ) @limiter.limit(f"{60 * RATE_MULTIPLIER}/minute") async def list_project_agents( request: Request, project_id: UUID, pagination: PaginationParams = Depends(), status_filter: AgentStatus | None = Query( None, alias="status", description="Filter by agent status" ), current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ List all agent instances in a project. Returns a paginated list of agents with optional status filtering. Results are ordered by creation date (newest first). Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project pagination: Pagination parameters status_filter: Optional filter by agent status current_user: Current authenticated user db: Database session Returns: PaginatedResponse[AgentInstanceResponse]: Paginated list of agents Raises: NotFoundError: If the project is not found AuthorizationError: If the user lacks access to the project """ try: # Verify project access project = await verify_project_access(db, project_id, current_user) # Get agents for the project agents, total = await agent_instance_crud.get_by_project( db, project_id=project_id, status=status_filter, skip=pagination.offset, limit=pagination.limit, ) # Build response objects agent_responses = [] for agent in agents: # Get details for each agent (could be optimized with bulk query) details = await agent_instance_crud.get_with_details( db, instance_id=agent.id ) if details: agent_responses.append( build_agent_response( agent=details["instance"], agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) ) else: agent_responses.append(build_agent_response(agent)) pagination_meta = create_pagination_meta( total=total, page=pagination.page, limit=pagination.limit, items_count=len(agent_responses), ) logger.debug( f"User {current_user.email} listed {len(agent_responses)} agents " f"in project {project.slug}" ) return PaginatedResponse(data=agent_responses, pagination=pagination_meta) except (NotFoundError, AuthorizationError): raise except Exception as e: logger.error(f"Error listing project agents: {e!s}", exc_info=True) raise # ===== Project Agent Metrics Endpoint ===== # NOTE: This endpoint MUST be defined before /{agent_id} routes # to prevent FastAPI from trying to parse "metrics" as a UUID @router.get( "/projects/{project_id}/agents/metrics", response_model=AgentInstanceMetrics, summary="Get Project Agent Metrics", description="Get aggregated usage metrics for all agents in a project.", operation_id="get_project_agent_metrics", ) @limiter.limit(f"{60 * RATE_MULTIPLIER}/minute") async def get_project_agent_metrics( request: Request, project_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Get aggregated usage metrics for all agents in a project. Returns aggregated metrics across all agents including total tasks completed, tokens used, and cost incurred. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project current_user: Current authenticated user db: Database session Returns: AgentInstanceMetrics: Aggregated project agent metrics Raises: NotFoundError: If the project is not found AuthorizationError: If the user lacks access to the project """ try: # Verify project access project = await verify_project_access(db, project_id, current_user) # Get aggregated metrics for the project metrics = await agent_instance_crud.get_project_metrics( db, project_id=project_id ) logger.debug( f"User {current_user.email} retrieved project metrics for {project.slug}" ) return AgentInstanceMetrics( total_instances=metrics["total_instances"], active_instances=metrics["active_instances"], idle_instances=metrics["idle_instances"], total_tasks_completed=metrics["total_tasks_completed"], total_tokens_used=metrics["total_tokens_used"], total_cost_incurred=metrics["total_cost_incurred"], ) except (NotFoundError, AuthorizationError): raise except Exception as e: logger.error(f"Error getting project agent metrics: {e!s}", exc_info=True) raise @router.get( "/projects/{project_id}/agents/{agent_id}", response_model=AgentInstanceResponse, summary="Get Agent Details", description="Get detailed information about a specific agent instance.", operation_id="get_agent", ) @limiter.limit(f"{60 * RATE_MULTIPLIER}/minute") async def get_agent( request: Request, project_id: UUID, agent_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Get detailed information about a specific agent instance. Returns full agent details including related entity information (agent type name, project name) and assigned issues count. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance current_user: Current authenticated user db: Database session Returns: AgentInstanceResponse: The agent instance details Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get agent with full details details = await agent_instance_crud.get_with_details(db, instance_id=agent_id) if not details: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) agent = details["instance"] # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) logger.debug( f"User {current_user.email} retrieved agent {agent.name} (id={agent_id})" ) return build_agent_response( agent=agent, agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) except (NotFoundError, AuthorizationError): raise except Exception as e: logger.error(f"Error getting agent details: {e!s}", exc_info=True) raise @router.patch( "/projects/{project_id}/agents/{agent_id}", response_model=AgentInstanceResponse, summary="Update Agent", description="Update an agent instance's configuration and state.", operation_id="update_agent", ) @limiter.limit(f"{30 * RATE_MULTIPLIER}/minute") async def update_agent( request: Request, project_id: UUID, agent_id: UUID, agent_in: AgentInstanceUpdate, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Update an agent instance's configuration and state. Allows updating agent status, current task, memory, and other configurable fields. Status transitions are validated according to the agent lifecycle state machine. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance agent_in: Agent update data current_user: Current authenticated user db: Database session Returns: AgentInstanceResponse: The updated agent instance Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project ValidationException: If the status transition is invalid """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get current agent agent = await agent_instance_crud.get(db, id=agent_id) if not agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) # Validate status transition if status is being changed if agent_in.status is not None and agent_in.status != agent.status: validate_status_transition(agent.status, agent_in.status) # Update the agent updated_agent = await agent_instance_crud.update( db, db_obj=agent, obj_in=agent_in ) logger.info( f"User {current_user.email} updated agent {updated_agent.name} " f"(id={agent_id})" ) # Get updated details details = await agent_instance_crud.get_with_details( db, instance_id=updated_agent.id ) if details: return build_agent_response( agent=details["instance"], agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) return build_agent_response(updated_agent) except (NotFoundError, AuthorizationError, ValidationException): raise except Exception as e: logger.error(f"Error updating agent: {e!s}", exc_info=True) raise @router.post( "/projects/{project_id}/agents/{agent_id}/pause", response_model=AgentInstanceResponse, summary="Pause Agent", description="Pause an agent instance, temporarily stopping its work.", operation_id="pause_agent", ) @limiter.limit(f"{20 * RATE_MULTIPLIER}/minute") async def pause_agent( request: Request, project_id: UUID, agent_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Pause an agent instance. Transitions the agent to PAUSED status, temporarily stopping its work. The agent can be resumed later with the resume endpoint. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance current_user: Current authenticated user db: Database session Returns: AgentInstanceResponse: The paused agent instance Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project ValidationException: If the agent cannot be paused from its current state """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get current agent agent = await agent_instance_crud.get(db, id=agent_id) if not agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) # Validate the transition to PAUSED validate_status_transition(agent.status, AgentStatus.PAUSED) # Update status to PAUSED paused_agent = await agent_instance_crud.update_status( db, instance_id=agent_id, status=AgentStatus.PAUSED, ) if not paused_agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) logger.info( f"User {current_user.email} paused agent {paused_agent.name} " f"(id={agent_id})" ) # Get updated details details = await agent_instance_crud.get_with_details( db, instance_id=paused_agent.id ) if details: return build_agent_response( agent=details["instance"], agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) return build_agent_response(paused_agent) except (NotFoundError, AuthorizationError, ValidationException): raise except Exception as e: logger.error(f"Error pausing agent: {e!s}", exc_info=True) raise @router.post( "/projects/{project_id}/agents/{agent_id}/resume", response_model=AgentInstanceResponse, summary="Resume Agent", description="Resume a paused agent instance.", operation_id="resume_agent", ) @limiter.limit(f"{20 * RATE_MULTIPLIER}/minute") async def resume_agent( request: Request, project_id: UUID, agent_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Resume a paused agent instance. Transitions the agent from PAUSED back to IDLE status, allowing it to accept new work. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance current_user: Current authenticated user db: Database session Returns: AgentInstanceResponse: The resumed agent instance Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project ValidationException: If the agent cannot be resumed from its current state """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get current agent agent = await agent_instance_crud.get(db, id=agent_id) if not agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) # Validate the transition to IDLE (resume) validate_status_transition(agent.status, AgentStatus.IDLE) # Update status to IDLE resumed_agent = await agent_instance_crud.update_status( db, instance_id=agent_id, status=AgentStatus.IDLE, ) if not resumed_agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) logger.info( f"User {current_user.email} resumed agent {resumed_agent.name} " f"(id={agent_id})" ) # Get updated details details = await agent_instance_crud.get_with_details( db, instance_id=resumed_agent.id ) if details: return build_agent_response( agent=details["instance"], agent_type_name=details.get("agent_type_name"), agent_type_slug=details.get("agent_type_slug"), project_name=details.get("project_name"), project_slug=details.get("project_slug"), assigned_issues_count=details.get("assigned_issues_count", 0), ) return build_agent_response(resumed_agent) except (NotFoundError, AuthorizationError, ValidationException): raise except Exception as e: logger.error(f"Error resuming agent: {e!s}", exc_info=True) raise @router.delete( "/projects/{project_id}/agents/{agent_id}", response_model=MessageResponse, summary="Terminate Agent", description="Terminate an agent instance, permanently stopping it.", operation_id="terminate_agent", ) @limiter.limit(f"{10 * RATE_MULTIPLIER}/minute") async def terminate_agent( request: Request, project_id: UUID, agent_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Terminate an agent instance. Permanently terminates the agent, setting its status to TERMINATED. This action cannot be undone - a new agent must be spawned if needed. The agent's session and current task are cleared. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance current_user: Current authenticated user db: Database session Returns: MessageResponse: Confirmation message Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project ValidationException: If the agent is already terminated """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get current agent agent = await agent_instance_crud.get(db, id=agent_id) if not agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) # Check if already terminated if agent.status == AgentStatus.TERMINATED: raise ValidationException( message="Agent is already terminated", error_code=ErrorCode.VALIDATION_ERROR, field="status", ) # Validate the transition to TERMINATED validate_status_transition(agent.status, AgentStatus.TERMINATED) agent_name = agent.name # Terminate the agent terminated_agent = await agent_instance_crud.terminate(db, instance_id=agent_id) if not terminated_agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) logger.info( f"User {current_user.email} terminated agent {agent_name} (id={agent_id})" ) return MessageResponse( success=True, message=f"Agent '{agent_name}' has been terminated", ) except (NotFoundError, AuthorizationError, ValidationException): raise except Exception as e: logger.error(f"Error terminating agent: {e!s}", exc_info=True) raise @router.get( "/projects/{project_id}/agents/{agent_id}/metrics", response_model=AgentInstanceMetrics, summary="Get Agent Metrics", description="Get usage metrics for a specific agent instance.", operation_id="get_agent_metrics", ) @limiter.limit(f"{60 * RATE_MULTIPLIER}/minute") async def get_agent_metrics( request: Request, project_id: UUID, agent_id: UUID, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ) -> Any: """ Get usage metrics for a specific agent instance. Returns metrics including tasks completed, tokens used, and cost incurred for the specified agent. Args: request: FastAPI request object (for rate limiting) project_id: UUID of the project agent_id: UUID of the agent instance current_user: Current authenticated user db: Database session Returns: AgentInstanceMetrics: Agent usage metrics Raises: NotFoundError: If the project or agent is not found AuthorizationError: If the user lacks access to the project """ try: # Verify project access await verify_project_access(db, project_id, current_user) # Get agent agent = await agent_instance_crud.get(db, id=agent_id) if not agent: raise NotFoundError( message=f"Agent {agent_id} not found", error_code=ErrorCode.NOT_FOUND, ) # Verify agent belongs to the specified project if agent.project_id != project_id: raise NotFoundError( message=f"Agent {agent_id} not found in project {project_id}", error_code=ErrorCode.NOT_FOUND, ) # Calculate metrics for this single agent # For a single agent, we report its individual metrics is_active = agent.status == AgentStatus.WORKING is_idle = agent.status == AgentStatus.IDLE logger.debug( f"User {current_user.email} retrieved metrics for agent {agent.name} " f"(id={agent_id})" ) return AgentInstanceMetrics( total_instances=1, active_instances=1 if is_active else 0, idle_instances=1 if is_idle else 0, total_tasks_completed=agent.tasks_completed, total_tokens_used=agent.tokens_used, total_cost_incurred=agent.cost_incurred, ) except (NotFoundError, AuthorizationError): raise except Exception as e: logger.error(f"Error getting agent metrics: {e!s}", exc_info=True) raise