From fb1d799b82e237e61ec908f9a305de97ac55d6e7 Mon Sep 17 00:00:00 2001 From: mika kuns Date: Tue, 9 Jun 2026 23:35:08 +0200 Subject: [PATCH] fix(worker): stateless AbortPlanningMerge after worker restart mid-merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PlanningMergeOrchestrator._states is in-memory. A worker restart during a conflict pause left the list repo mid-merge with no recovery path: both ContinuePlanningMerge and AbortPlanningMerge threw "no in-progress merge", and re-Approving failed on the IsMidMergeAsync guard. AbortAsync now falls through to a stateless path when no _states entry exists: it looks up the parent's list WorkingDir and, if the repo is mid-merge, runs git merge --abort there directly, then broadcasts PlanningMergeAborted. Parent remains WaitingForReview — the next Approve restarts the unit merge (already-Merged child worktrees are skipped as before). ContinueAsync error message now points to AbortPlanningMerge as the recovery action. StartAsync mid-merge guard also carries an actionable hint. Tests: AbortAsync stateless + mid-merge (restart recovery), AbortAsync stateless + clean repo (clear error). Co-Authored-By: Claude Sonnet 4.6 --- .../Planning/PlanningMergeOrchestrator.cs | 35 ++++++++++-- .../PlanningMergeOrchestratorTests.cs | 53 +++++++++++++++++++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/src/ClaudeDo.Worker/Planning/PlanningMergeOrchestrator.cs b/src/ClaudeDo.Worker/Planning/PlanningMergeOrchestrator.cs index 3b4b6bb..0e2aec3 100644 --- a/src/ClaudeDo.Worker/Planning/PlanningMergeOrchestrator.cs +++ b/src/ClaudeDo.Worker/Planning/PlanningMergeOrchestrator.cs @@ -85,7 +85,8 @@ public sealed class PlanningMergeOrchestrator } if (await _git.IsMidMergeAsync(workingDir, ct)) - throw new InvalidOperationException("repo is mid-merge"); + throw new InvalidOperationException( + "repo is mid-merge; use AbortPlanningMerge to reset the repository, then Approve again"); if (await _git.HasChangesAsync(workingDir, ct)) throw new InvalidOperationException("working tree has uncommitted changes"); @@ -110,7 +111,8 @@ public sealed class PlanningMergeOrchestrator public async Task ContinueAsync(string planningTaskId, CancellationToken ct) { if (!_states.TryGetValue(planningTaskId, out var state) || state.CurrentSubtaskId is null) - throw new InvalidOperationException("no in-progress merge to continue"); + throw new InvalidOperationException( + "no in-progress merge to continue; if the worker was restarted during a conflict, use AbortPlanningMerge to reset the repository"); var current = state.CurrentSubtaskId; var result = await _merge.ContinueMergeAsync(current, ct); @@ -140,13 +142,40 @@ public sealed class PlanningMergeOrchestrator public async Task AbortAsync(string planningTaskId, CancellationToken ct) { if (!_states.TryGetValue(planningTaskId, out var state) || state.CurrentSubtaskId is null) - throw new InvalidOperationException("no in-progress merge to abort"); + { + // No in-memory state — worker may have been restarted while a conflict was paused. + // Check whether the list repo is still mid-merge and abort it directly. + await AbortStatelessAsync(planningTaskId, ct); + return; + } await _merge.AbortMergeAsync(state.CurrentSubtaskId, ct); _states.TryRemove(planningTaskId, out _); await _broadcaster.PlanningMergeAborted(planningTaskId); } + private async Task AbortStatelessAsync(string planningTaskId, CancellationToken ct) + { + string? workingDir; + await using (var ctx = _dbFactory.CreateDbContext()) + { + workingDir = await ctx.Tasks + .Where(t => t.Id == planningTaskId) + .Select(t => t.List.WorkingDir) + .FirstOrDefaultAsync(ct); + } + + if (string.IsNullOrWhiteSpace(workingDir) || !await _git.IsMidMergeAsync(workingDir, ct)) + throw new InvalidOperationException("no in-progress merge to abort"); + + await _git.MergeAbortAsync(workingDir, ct); + _logger.LogInformation( + "Stateless abort of mid-merge for planning task {ParentId} (post-restart recovery)", + planningTaskId); + await _broadcaster.PlanningMergeAborted(planningTaskId); + // Parent remains WaitingForReview — Approve will restart the unit merge from scratch. + } + private async Task DrainAsync(string planningTaskId, CancellationToken ct) { if (!_states.TryGetValue(planningTaskId, out var state)) return; diff --git a/tests/ClaudeDo.Worker.Tests/Planning/PlanningMergeOrchestratorTests.cs b/tests/ClaudeDo.Worker.Tests/Planning/PlanningMergeOrchestratorTests.cs index 0c646e2..0202258 100644 --- a/tests/ClaudeDo.Worker.Tests/Planning/PlanningMergeOrchestratorTests.cs +++ b/tests/ClaudeDo.Worker.Tests/Planning/PlanningMergeOrchestratorTests.cs @@ -245,6 +245,59 @@ public sealed class PlanningMergeOrchestratorTests : IDisposable Assert.False(await git.IsMidMergeAsync(repo.RepoDir, CancellationToken.None)); } + // ─── Stateless abort (post-restart recovery) ─────────────────────────── + + /// + /// Worker restarted while a conflict was paused: _states is empty but the list repo is + /// still mid-merge. AbortAsync must abort the dangling merge, broadcast PlanningMergeAborted, + /// and leave the parent in WaitingForReview so a fresh Approve can retry. + /// + [Fact] + public async Task AbortAsync_NoState_RepoMidMerge_AbortsAndBroadcasts() + { + var db = NewDb(); + var repo = NewRepo(); + GitRepoFixture.RunGit(repo.RepoDir, "branch", "-m", "main"); + + var (parentId, _, subB, _) = await SeedPlanningThreeChildrenMiddleConflictsAsync(db, repo); + + // Drive orch1 into the conflict pause — repo is now mid-merge. + var (orch1, _) = BuildOrchestrator(db); + await orch1.StartAsync(parentId, "main", CancellationToken.None); + + // Simulate restart: fresh orchestrator has no in-memory state. + var (orch2, spy) = BuildOrchestrator(db); + + await orch2.AbortAsync(parentId, CancellationToken.None); + + var git = new GitService(); + Assert.False(await git.IsMidMergeAsync(repo.RepoDir, CancellationToken.None)); + + using var ctx = db.CreateContext(); + Assert.Equal(TaskStatus.WaitingForReview, ctx.Tasks.Single(t => t.Id == parentId).Status); + + Assert.Contains(spy, c => c.Method == "PlanningMergeAborted" && (string)c.Args[0]! == parentId); + } + + /// + /// No in-memory state and repo is clean — nothing to abort. Must throw a clear error. + /// + [Fact] + public async Task AbortAsync_NoState_RepoNotMidMerge_ThrowsClear() + { + var db = NewDb(); + var repo = NewRepo(); + GitRepoFixture.RunGit(repo.RepoDir, "branch", "-m", "main"); + + var (parentId, _, _) = await SeedPlanningWithTwoNonConflictingChildrenAsync(db, repo); + + var (orch, _) = BuildOrchestrator(db); + + var ex = await Assert.ThrowsAsync( + () => orch.AbortAsync(parentId, CancellationToken.None)); + Assert.Contains("no in-progress merge", ex.Message); + } + private (PlanningMergeOrchestrator orch, List<(string Method, object?[] Args)> calls) BuildOrchestrator(DbFixture db) { var fakeHub = new OrchestratorFakeHubContext();