diff --git a/backend/app/crud/syndarix/agent_instance.py b/backend/app/crud/syndarix/agent_instance.py index 2965fd7..a7c6ce9 100644 --- a/backend/app/crud/syndarix/agent_instance.py +++ b/backend/app/crud/syndarix/agent_instance.py @@ -249,23 +249,35 @@ class CRUDAgentInstance(CRUDBase[AgentInstance, AgentInstanceCreate, AgentInstan tokens_used: int, cost_incurred: Decimal, ) -> AgentInstance | None: - """Record a completed task and update metrics.""" + """Record a completed task and update metrics. + + Uses atomic SQL UPDATE to prevent lost updates under concurrent load. + This avoids the read-modify-write race condition that occurs when + multiple task completions happen simultaneously. + """ try: + now = datetime.now(UTC) + + # Use atomic SQL UPDATE to increment counters without race conditions + # This is safe for concurrent updates - no read-modify-write pattern result = await db.execute( - select(AgentInstance).where(AgentInstance.id == instance_id) + update(AgentInstance) + .where(AgentInstance.id == instance_id) + .values( + tasks_completed=AgentInstance.tasks_completed + 1, + tokens_used=AgentInstance.tokens_used + tokens_used, + cost_incurred=AgentInstance.cost_incurred + cost_incurred, + last_activity_at=now, + updated_at=now, + ) + .returning(AgentInstance) ) instance = result.scalar_one_or_none() if not instance: return None - instance.tasks_completed += 1 - instance.tokens_used += tokens_used - instance.cost_incurred += cost_incurred - instance.last_activity_at = datetime.now(UTC) - await db.commit() - await db.refresh(instance) return instance except Exception as e: await db.rollback() diff --git a/backend/app/crud/syndarix/sprint.py b/backend/app/crud/syndarix/sprint.py index 16f0acd..9f459ea 100644 --- a/backend/app/crud/syndarix/sprint.py +++ b/backend/app/crud/syndarix/sprint.py @@ -249,9 +249,18 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): *, sprint_id: UUID, ) -> Sprint | None: - """Complete an active sprint and calculate completed points.""" + """Complete an active sprint and calculate completed points. + + Uses row-level locking (SELECT FOR UPDATE) to prevent race conditions + when velocity is being calculated and other operations might modify issues. + """ try: - result = await db.execute(select(Sprint).where(Sprint.id == sprint_id)) + # Lock the sprint row to prevent concurrent modifications + result = await db.execute( + select(Sprint) + .where(Sprint.id == sprint_id) + .with_for_update() + ) sprint = result.scalar_one_or_none() if not sprint: @@ -265,6 +274,8 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): sprint.status = SprintStatus.COMPLETED # Calculate velocity (completed points) from closed issues + # Note: Issues are not locked, but sprint lock ensures this sprint's + # completion is atomic and prevents concurrent completion attempts points_result = await db.execute( select(func.sum(Issue.story_points)).where( Issue.sprint_id == sprint_id, @@ -289,9 +300,18 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): *, sprint_id: UUID, ) -> Sprint | None: - """Cancel a sprint (only PLANNED or ACTIVE sprints can be cancelled).""" + """Cancel a sprint (only PLANNED or ACTIVE sprints can be cancelled). + + Uses row-level locking to prevent race conditions with concurrent + sprint status modifications. + """ try: - result = await db.execute(select(Sprint).where(Sprint.id == sprint_id)) + # Lock the sprint row to prevent concurrent modifications + result = await db.execute( + select(Sprint) + .where(Sprint.id == sprint_id) + .with_for_update() + ) sprint = result.scalar_one_or_none() if not sprint: