chore(claude-do): fix(worker): TaskRunner bricht ab, wenn StartRunningAsync fe

Befund (bestätigt): src/ClaudeDo.Worker/Runner/TaskRunner.cs:101 (RunAsync) und :211 (ContinueAsync) ignorieren das TransitionResult von _state.StartRunningAsync. Race-Szenario: Der QueuePicker claimt Queued→Running atomar; ruft der Override-Pfad (RunNow) kurz danach RunAsync für denselben Task auf, schlägt StartRunningAsync fehl (0 rows affected), der Runner startet Claude aber trotzdem → derselb

ClaudeDo-Task: 44f86be2-7f3d-462e-98b3-eb94c0174eea
This commit is contained in:
mika kuns
2026-06-09 23:32:57 +02:00
parent eee5c99e2f
commit 12fdb55a8e
2 changed files with 110 additions and 2 deletions

View File

@@ -98,7 +98,12 @@ public sealed class TaskRunner
} }
var now = DateTime.UtcNow; var now = DateTime.UtcNow;
await _state.StartRunningAsync(task.Id, now, ct); var startResult = await _state.StartRunningAsync(task.Id, now, ct);
if (!startResult.Ok)
{
_logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", task.Id, startResult.Reason);
return;
}
await _broadcaster.TaskStarted(slot, task.Id, now); await _broadcaster.TaskStarted(slot, task.Id, now);
// Build prompt: title + description + only the OPEN sub-tasks (resolved ones are dropped). // Build prompt: title + description + only the OPEN sub-tasks (resolved ones are dropped).
@@ -208,7 +213,12 @@ public sealed class TaskRunner
} }
var now = DateTime.UtcNow; var now = DateTime.UtcNow;
await _state.StartRunningAsync(taskId, now, ct); var startResult = await _state.StartRunningAsync(taskId, now, ct);
if (!startResult.Ok)
{
_logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", taskId, startResult.Reason);
return;
}
await _broadcaster.TaskStarted(slot, taskId, now); await _broadcaster.TaskStarted(slot, taskId, now);
try try

View File

@@ -0,0 +1,98 @@
using ClaudeDo.Data.Models;
using ClaudeDo.Data.Repositories;
using ClaudeDo.Worker.Config;
using ClaudeDo.Worker.Hub;
using ClaudeDo.Worker.Runner;
using ClaudeDo.Worker.Tests.Infrastructure;
using Microsoft.Extensions.Logging.Abstractions;
using TaskStatus = ClaudeDo.Data.Models.TaskStatus;
namespace ClaudeDo.Worker.Tests.Runner;
/// Verifies that RunAsync and ContinueAsync abort cleanly when StartRunningAsync fails
/// (e.g. the task is already Running due to a concurrent RunNow / QueuePicker race).
public sealed class StartRunningGuardTests : IDisposable
{
private readonly DbFixture _db = new();
private readonly string _tempDir;
private readonly WorkerConfig _cfg;
public StartRunningGuardTests()
{
_tempDir = Path.Combine(Path.GetTempPath(), $"cd_guard_{Guid.NewGuid():N}");
Directory.CreateDirectory(_tempDir);
_cfg = new WorkerConfig { SandboxRoot = _tempDir, LogRoot = _tempDir };
}
public void Dispose() { _db.Dispose(); try { Directory.Delete(_tempDir, true); } catch { } }
private TaskRunner BuildRunner(IClaudeProcess claude)
{
var dbFactory = _db.CreateFactory();
var state = TaskStateServiceBuilder.Build(dbFactory).State;
var wt = new WorktreeManager(new ClaudeDo.Data.Git.GitService(), dbFactory, _cfg, NullLogger<WorktreeManager>.Instance);
return new TaskRunner(claude, dbFactory, new HubBroadcaster(new CapturingHubContext()), wt,
new ClaudeArgsBuilder(), _cfg, NullLogger<TaskRunner>.Instance, state, new TaskRunTokenRegistry());
}
[Fact]
public async Task RunAsync_TaskAlreadyRunning_NoProcessStarted_NoRunRecord()
{
string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString();
using (var ctx = _db.CreateContext())
{
ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow });
ctx.Tasks.Add(new TaskEntity
{
Id = taskId, ListId = listId, Title = "Already running",
Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow,
});
await ctx.SaveChangesAsync();
}
var fake = new FakeClaudeProcess();
var runner = BuildRunner(fake);
using (var ctx = _db.CreateContext())
await runner.RunAsync((await new TaskRepository(ctx).GetByIdAsync(taskId))!, "slot-1", CancellationToken.None);
Assert.Equal(0, fake.CallCount);
using var verify = _db.CreateContext();
Assert.Empty(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId));
}
[Fact]
public async Task ContinueAsync_TaskAlreadyRunning_NoProcessStarted_NoNewRunRecord()
{
string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString();
using (var ctx = _db.CreateContext())
{
ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow });
ctx.Tasks.Add(new TaskEntity
{
Id = taskId, ListId = listId, Title = "Already running",
Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow,
});
await ctx.SaveChangesAsync();
await new TaskRunRepository(ctx).AddAsync(new TaskRunEntity
{
Id = Guid.NewGuid().ToString(), TaskId = taskId, RunNumber = 1,
IsRetry = false, Prompt = "p", SessionId = "sess-guard-test",
StartedAt = DateTime.UtcNow.AddMinutes(-5), FinishedAt = DateTime.UtcNow.AddMinutes(-1),
ExitCode = 0, ResultMarkdown = "ok",
});
}
var fake = new FakeClaudeProcess();
var runner = BuildRunner(fake);
await runner.ContinueAsync(taskId, "follow up", "slot-1", CancellationToken.None);
Assert.Equal(0, fake.CallCount);
using var verify = _db.CreateContext();
Assert.Single(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId));
}
}