feat(planning): prevent orphaned subtasks via guards + startup repair

Three coordinated guards close the orphan-creation paths:

- CreateChildAsync refuses when the parent is not in a planning phase.
- DiscardPlanningAsync now returns a structured DiscardPlanningOutcome
  and refuses when children are queued or running; callers can opt into
  auto-dequeuing queued kids via dequeueQueuedChildren=true. Terminal
  children (Done/Failed/Cancelled) are promoted to top-level instead of
  becoming orphans when the parent's PlanningPhase is reset.
- OrphanRecovery hosted service clears ParentTaskId on any rows whose
  parent is missing or no longer in a planning phase on worker startup,
  mirroring the StaleTaskRecovery pattern.

UI surfaces the block reason: a confirm dialog offers to dequeue queued
children and retry; a running-children block is shown as a hard error
asking the user to cancel first.

WorkerClient now negotiates the JsonStringEnumConverter so the
DiscardPlanningResult enum round-trips correctly over SignalR.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
mika kuns
2026-05-18 16:02:15 +02:00
parent e68bb737e3
commit d094a21e09
17 changed files with 481 additions and 32 deletions

View File

@@ -388,7 +388,8 @@ public sealed class WorkerHub : Microsoft.AspNetCore.SignalR.Hub
}
catch (PlanningLaunchException)
{
await _planning.DiscardAsync(taskId, Context.ConnectionAborted);
// Launch failed before any children could be created; force-cleanup is safe.
await _planning.DiscardAsync(taskId, dequeueQueuedChildren: true, Context.ConnectionAborted);
throw;
}
await Clients.All.SendAsync("TaskUpdated", taskId);
@@ -408,10 +409,12 @@ public sealed class WorkerHub : Microsoft.AspNetCore.SignalR.Hub
await _launcher.LaunchInteractiveAsync(ctx, Context.ConnectionAborted);
}
public async Task DiscardPlanningSessionAsync(string taskId)
public async Task<DiscardPlanningOutcome> DiscardPlanningSessionAsync(string taskId, bool dequeueQueuedChildren = false)
{
await _planning.DiscardAsync(taskId, Context.ConnectionAborted);
await Clients.All.SendAsync("TaskUpdated", taskId);
var outcome = await _planning.DiscardAsync(taskId, dequeueQueuedChildren, Context.ConnectionAborted);
if (outcome.Result == DiscardPlanningResult.Discarded)
await Clients.All.SendAsync("TaskUpdated", taskId);
return outcome;
}
public async Task<int> FinalizePlanningSessionAsync(string taskId, bool queueAgentTasks = true)

View File

@@ -0,0 +1,38 @@
using ClaudeDo.Data;
using ClaudeDo.Data.Repositories;
using Microsoft.EntityFrameworkCore;
namespace ClaudeDo.Worker.Lifecycle;
/// <summary>
/// Startup-only sweep: clears <c>ParentTaskId</c> on rows whose parent is missing or
/// no longer in a planning phase. These rows would otherwise be invisible in the UI
/// (the parent doesn't render as a planning header) and cannot reach a terminal state
/// through the chain coordinator. Promoting them to top-level restores both.
/// </summary>
public sealed class OrphanRecovery : IHostedService
{
private readonly IDbContextFactory<ClaudeDoDbContext> _dbFactory;
private readonly ILogger<OrphanRecovery> _logger;
public OrphanRecovery(
IDbContextFactory<ClaudeDoDbContext> dbFactory,
ILogger<OrphanRecovery> logger)
{
_dbFactory = dbFactory;
_logger = logger;
}
public async Task StartAsync(CancellationToken cancellationToken)
{
await using var ctx = await _dbFactory.CreateDbContextAsync(cancellationToken);
var repo = new TaskRepository(ctx);
var repaired = await repo.RepairOrphanedChildrenAsync(cancellationToken);
if (repaired > 0)
_logger.LogWarning("Orphan recovery: promoted {Count} orphaned child task(s) to top-level", repaired);
else
_logger.LogInformation("Orphan recovery: no orphans found");
}
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
}

View File

@@ -236,12 +236,17 @@ public sealed class PlanningSessionManager
return children.Count(c => c.Status == TaskStatus.Idle);
}
public async Task DiscardAsync(string taskId, CancellationToken ct)
public async Task<DiscardPlanningOutcome> DiscardAsync(
string taskId,
bool dequeueQueuedChildren,
CancellationToken ct)
{
var (tasks, lists, settings, ctx) = CreateRepos();
await using var __ = ctx;
var ok = await tasks.DiscardPlanningAsync(taskId, ct);
var outcome = await tasks.DiscardPlanningAsync(taskId, dequeueQueuedChildren, ct);
if (outcome.Result != DiscardPlanningResult.Discarded)
return outcome;
await TryCleanupWorktreeAsync(taskId, lists, settings, ct);
@@ -251,8 +256,7 @@ public sealed class PlanningSessionManager
try { Directory.Delete(sessionDir, recursive: true); } catch { }
}
if (!ok)
throw new InvalidOperationException($"Task {taskId} was not in Planning state; nothing to discard.");
return outcome;
}
public async Task<PlanningSessionResumeContext> ResumeAsync(string taskId, CancellationToken ct)

View File

@@ -27,6 +27,7 @@ builder.Services.AddDbContextFactory<ClaudeDoDbContext>(opt =>
builder.Services.AddSingleton(cfg);
builder.Services.AddHostedService<StaleTaskRecovery>();
builder.Services.AddHostedService<OrphanRecovery>();
builder.Services.AddSignalR().AddJsonProtocol(options =>
{
options.PayloadSerializerOptions.Converters.Add(new System.Text.Json.Serialization.JsonStringEnumConverter());