fix(worker): stateless AbortPlanningMerge after worker restart mid-merge

PlanningMergeOrchestrator._states is in-memory. A worker restart during a
conflict pause left the list repo mid-merge with no recovery path: both
ContinuePlanningMerge and AbortPlanningMerge threw "no in-progress merge",
and re-Approving failed on the IsMidMergeAsync guard.

AbortAsync now falls through to a stateless path when no _states entry exists:
it looks up the parent's list WorkingDir and, if the repo is mid-merge, runs
git merge --abort there directly, then broadcasts PlanningMergeAborted.
Parent remains WaitingForReview — the next Approve restarts the unit merge
(already-Merged child worktrees are skipped as before).

ContinueAsync error message now points to AbortPlanningMerge as the recovery
action. StartAsync mid-merge guard also carries an actionable hint.

Tests: AbortAsync stateless + mid-merge (restart recovery), AbortAsync
stateless + clean repo (clear error).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
mika kuns
2026-06-09 23:35:08 +02:00
parent eee5c99e2f
commit fb1d799b82
2 changed files with 85 additions and 3 deletions

View File

@@ -85,7 +85,8 @@ public sealed class PlanningMergeOrchestrator
}
if (await _git.IsMidMergeAsync(workingDir, ct))
throw new InvalidOperationException("repo is mid-merge");
throw new InvalidOperationException(
"repo is mid-merge; use AbortPlanningMerge to reset the repository, then Approve again");
if (await _git.HasChangesAsync(workingDir, ct))
throw new InvalidOperationException("working tree has uncommitted changes");
@@ -110,7 +111,8 @@ public sealed class PlanningMergeOrchestrator
public async Task ContinueAsync(string planningTaskId, CancellationToken ct)
{
if (!_states.TryGetValue(planningTaskId, out var state) || state.CurrentSubtaskId is null)
throw new InvalidOperationException("no in-progress merge to continue");
throw new InvalidOperationException(
"no in-progress merge to continue; if the worker was restarted during a conflict, use AbortPlanningMerge to reset the repository");
var current = state.CurrentSubtaskId;
var result = await _merge.ContinueMergeAsync(current, ct);
@@ -140,13 +142,40 @@ public sealed class PlanningMergeOrchestrator
public async Task AbortAsync(string planningTaskId, CancellationToken ct)
{
if (!_states.TryGetValue(planningTaskId, out var state) || state.CurrentSubtaskId is null)
throw new InvalidOperationException("no in-progress merge to abort");
{
// No in-memory state — worker may have been restarted while a conflict was paused.
// Check whether the list repo is still mid-merge and abort it directly.
await AbortStatelessAsync(planningTaskId, ct);
return;
}
await _merge.AbortMergeAsync(state.CurrentSubtaskId, ct);
_states.TryRemove(planningTaskId, out _);
await _broadcaster.PlanningMergeAborted(planningTaskId);
}
private async Task AbortStatelessAsync(string planningTaskId, CancellationToken ct)
{
string? workingDir;
await using (var ctx = _dbFactory.CreateDbContext())
{
workingDir = await ctx.Tasks
.Where(t => t.Id == planningTaskId)
.Select(t => t.List.WorkingDir)
.FirstOrDefaultAsync(ct);
}
if (string.IsNullOrWhiteSpace(workingDir) || !await _git.IsMidMergeAsync(workingDir, ct))
throw new InvalidOperationException("no in-progress merge to abort");
await _git.MergeAbortAsync(workingDir, ct);
_logger.LogInformation(
"Stateless abort of mid-merge for planning task {ParentId} (post-restart recovery)",
planningTaskId);
await _broadcaster.PlanningMergeAborted(planningTaskId);
// Parent remains WaitingForReview — Approve will restart the unit merge from scratch.
}
private async Task DrainAsync(string planningTaskId, CancellationToken ct)
{
if (!_states.TryGetValue(planningTaskId, out var state)) return;

View File

@@ -245,6 +245,59 @@ public sealed class PlanningMergeOrchestratorTests : IDisposable
Assert.False(await git.IsMidMergeAsync(repo.RepoDir, CancellationToken.None));
}
// ─── Stateless abort (post-restart recovery) ───────────────────────────
/// <summary>
/// Worker restarted while a conflict was paused: _states is empty but the list repo is
/// still mid-merge. AbortAsync must abort the dangling merge, broadcast PlanningMergeAborted,
/// and leave the parent in WaitingForReview so a fresh Approve can retry.
/// </summary>
[Fact]
public async Task AbortAsync_NoState_RepoMidMerge_AbortsAndBroadcasts()
{
var db = NewDb();
var repo = NewRepo();
GitRepoFixture.RunGit(repo.RepoDir, "branch", "-m", "main");
var (parentId, _, subB, _) = await SeedPlanningThreeChildrenMiddleConflictsAsync(db, repo);
// Drive orch1 into the conflict pause — repo is now mid-merge.
var (orch1, _) = BuildOrchestrator(db);
await orch1.StartAsync(parentId, "main", CancellationToken.None);
// Simulate restart: fresh orchestrator has no in-memory state.
var (orch2, spy) = BuildOrchestrator(db);
await orch2.AbortAsync(parentId, CancellationToken.None);
var git = new GitService();
Assert.False(await git.IsMidMergeAsync(repo.RepoDir, CancellationToken.None));
using var ctx = db.CreateContext();
Assert.Equal(TaskStatus.WaitingForReview, ctx.Tasks.Single(t => t.Id == parentId).Status);
Assert.Contains(spy, c => c.Method == "PlanningMergeAborted" && (string)c.Args[0]! == parentId);
}
/// <summary>
/// No in-memory state and repo is clean — nothing to abort. Must throw a clear error.
/// </summary>
[Fact]
public async Task AbortAsync_NoState_RepoNotMidMerge_ThrowsClear()
{
var db = NewDb();
var repo = NewRepo();
GitRepoFixture.RunGit(repo.RepoDir, "branch", "-m", "main");
var (parentId, _, _) = await SeedPlanningWithTwoNonConflictingChildrenAsync(db, repo);
var (orch, _) = BuildOrchestrator(db);
var ex = await Assert.ThrowsAsync<InvalidOperationException>(
() => orch.AbortAsync(parentId, CancellationToken.None));
Assert.Contains("no in-progress merge", ex.Message);
}
private (PlanningMergeOrchestrator orch, List<(string Method, object?[] Args)> calls) BuildOrchestrator(DbFixture db)
{
var fakeHub = new OrchestratorFakeHubContext();