From 7280b182bd0094d73d46338a9b33333167e9d11f Mon Sep 17 00:00:00 2001 From: Felipe Cardoso Date: Wed, 31 Dec 2025 17:23:33 +0100 Subject: [PATCH] fix(backend): race condition fixes for task completion and sprint operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes ### agent_instance.py - Task Completion Counter Race Condition - Changed `record_task_completion()` from read-modify-write pattern to atomic SQL UPDATE - Previously: Read instance → increment in Python memory → write back - Now: Uses `UPDATE ... SET tasks_completed = tasks_completed + 1` - Prevents lost updates when multiple concurrent task completions occur ### sprint.py - Row-Level Locking for Sprint Operations - Added `with_for_update()` to `complete_sprint()` to prevent race conditions during velocity calculation - Added `with_for_update()` to `cancel_sprint()` for consistency - Ensures atomic check-and-update for sprint status changes ## Impact These fixes prevent: - Counter metrics being lost under concurrent load - Data corruption during sprint completion - Race conditions with concurrent sprint status changes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/crud/syndarix/agent_instance.py | 28 +++++++++++++++------ backend/app/crud/syndarix/sprint.py | 28 ++++++++++++++++++--- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/backend/app/crud/syndarix/agent_instance.py b/backend/app/crud/syndarix/agent_instance.py index 2965fd7..a7c6ce9 100644 --- a/backend/app/crud/syndarix/agent_instance.py +++ b/backend/app/crud/syndarix/agent_instance.py @@ -249,23 +249,35 @@ class CRUDAgentInstance(CRUDBase[AgentInstance, AgentInstanceCreate, AgentInstan tokens_used: int, cost_incurred: Decimal, ) -> AgentInstance | None: - """Record a completed task and update metrics.""" + """Record a completed task and update metrics. + + Uses atomic SQL UPDATE to prevent lost updates under concurrent load. + This avoids the read-modify-write race condition that occurs when + multiple task completions happen simultaneously. + """ try: + now = datetime.now(UTC) + + # Use atomic SQL UPDATE to increment counters without race conditions + # This is safe for concurrent updates - no read-modify-write pattern result = await db.execute( - select(AgentInstance).where(AgentInstance.id == instance_id) + update(AgentInstance) + .where(AgentInstance.id == instance_id) + .values( + tasks_completed=AgentInstance.tasks_completed + 1, + tokens_used=AgentInstance.tokens_used + tokens_used, + cost_incurred=AgentInstance.cost_incurred + cost_incurred, + last_activity_at=now, + updated_at=now, + ) + .returning(AgentInstance) ) instance = result.scalar_one_or_none() if not instance: return None - instance.tasks_completed += 1 - instance.tokens_used += tokens_used - instance.cost_incurred += cost_incurred - instance.last_activity_at = datetime.now(UTC) - await db.commit() - await db.refresh(instance) return instance except Exception as e: await db.rollback() diff --git a/backend/app/crud/syndarix/sprint.py b/backend/app/crud/syndarix/sprint.py index 16f0acd..9f459ea 100644 --- a/backend/app/crud/syndarix/sprint.py +++ b/backend/app/crud/syndarix/sprint.py @@ -249,9 +249,18 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): *, sprint_id: UUID, ) -> Sprint | None: - """Complete an active sprint and calculate completed points.""" + """Complete an active sprint and calculate completed points. + + Uses row-level locking (SELECT FOR UPDATE) to prevent race conditions + when velocity is being calculated and other operations might modify issues. + """ try: - result = await db.execute(select(Sprint).where(Sprint.id == sprint_id)) + # Lock the sprint row to prevent concurrent modifications + result = await db.execute( + select(Sprint) + .where(Sprint.id == sprint_id) + .with_for_update() + ) sprint = result.scalar_one_or_none() if not sprint: @@ -265,6 +274,8 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): sprint.status = SprintStatus.COMPLETED # Calculate velocity (completed points) from closed issues + # Note: Issues are not locked, but sprint lock ensures this sprint's + # completion is atomic and prevents concurrent completion attempts points_result = await db.execute( select(func.sum(Issue.story_points)).where( Issue.sprint_id == sprint_id, @@ -289,9 +300,18 @@ class CRUDSprint(CRUDBase[Sprint, SprintCreate, SprintUpdate]): *, sprint_id: UUID, ) -> Sprint | None: - """Cancel a sprint (only PLANNED or ACTIVE sprints can be cancelled).""" + """Cancel a sprint (only PLANNED or ACTIVE sprints can be cancelled). + + Uses row-level locking to prevent race conditions with concurrent + sprint status modifications. + """ try: - result = await db.execute(select(Sprint).where(Sprint.id == sprint_id)) + # Lock the sprint row to prevent concurrent modifications + result = await db.execute( + select(Sprint) + .where(Sprint.id == sprint_id) + .with_for_update() + ) sprint = result.scalar_one_or_none() if not sprint: