From 12fdb55a8ea982bdf0b730a386ad4c2844eecf3a Mon Sep 17 00:00:00 2001 From: mika kuns Date: Tue, 9 Jun 2026 23:32:57 +0200 Subject: [PATCH] =?UTF-8?q?=EF=BB=BFchore(claude-do):=20fix(worker):=20Tas?= =?UTF-8?q?kRunner=20bricht=20ab,=20wenn=20StartRunningAsync=20fe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Befund (bestätigt): src/ClaudeDo.Worker/Runner/TaskRunner.cs:101 (RunAsync) und :211 (ContinueAsync) ignorieren das TransitionResult von _state.StartRunningAsync. Race-Szenario: Der QueuePicker claimt Queued→Running atomar; ruft der Override-Pfad (RunNow) kurz danach RunAsync für denselben Task auf, schlägt StartRunningAsync fehl (0 rows affected), der Runner startet Claude aber trotzdem → derselb ClaudeDo-Task: 44f86be2-7f3d-462e-98b3-eb94c0174eea --- src/ClaudeDo.Worker/Runner/TaskRunner.cs | 14 ++- .../Runner/StartRunningGuardTests.cs | 98 +++++++++++++++++++ 2 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs diff --git a/src/ClaudeDo.Worker/Runner/TaskRunner.cs b/src/ClaudeDo.Worker/Runner/TaskRunner.cs index b481d86..fd4591b 100644 --- a/src/ClaudeDo.Worker/Runner/TaskRunner.cs +++ b/src/ClaudeDo.Worker/Runner/TaskRunner.cs @@ -98,7 +98,12 @@ public sealed class TaskRunner } var now = DateTime.UtcNow; - await _state.StartRunningAsync(task.Id, now, ct); + var startResult = await _state.StartRunningAsync(task.Id, now, ct); + if (!startResult.Ok) + { + _logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", task.Id, startResult.Reason); + return; + } await _broadcaster.TaskStarted(slot, task.Id, now); // Build prompt: title + description + only the OPEN sub-tasks (resolved ones are dropped). @@ -208,7 +213,12 @@ public sealed class TaskRunner } var now = DateTime.UtcNow; - await _state.StartRunningAsync(taskId, now, ct); + var startResult = await _state.StartRunningAsync(taskId, now, ct); + if (!startResult.Ok) + { + _logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", taskId, startResult.Reason); + return; + } await _broadcaster.TaskStarted(slot, taskId, now); try diff --git a/tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs b/tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs new file mode 100644 index 0000000..c30451b --- /dev/null +++ b/tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs @@ -0,0 +1,98 @@ +using ClaudeDo.Data.Models; +using ClaudeDo.Data.Repositories; +using ClaudeDo.Worker.Config; +using ClaudeDo.Worker.Hub; +using ClaudeDo.Worker.Runner; +using ClaudeDo.Worker.Tests.Infrastructure; +using Microsoft.Extensions.Logging.Abstractions; +using TaskStatus = ClaudeDo.Data.Models.TaskStatus; + +namespace ClaudeDo.Worker.Tests.Runner; + +/// Verifies that RunAsync and ContinueAsync abort cleanly when StartRunningAsync fails +/// (e.g. the task is already Running due to a concurrent RunNow / QueuePicker race). +public sealed class StartRunningGuardTests : IDisposable +{ + private readonly DbFixture _db = new(); + private readonly string _tempDir; + private readonly WorkerConfig _cfg; + + public StartRunningGuardTests() + { + _tempDir = Path.Combine(Path.GetTempPath(), $"cd_guard_{Guid.NewGuid():N}"); + Directory.CreateDirectory(_tempDir); + _cfg = new WorkerConfig { SandboxRoot = _tempDir, LogRoot = _tempDir }; + } + + public void Dispose() { _db.Dispose(); try { Directory.Delete(_tempDir, true); } catch { } } + + private TaskRunner BuildRunner(IClaudeProcess claude) + { + var dbFactory = _db.CreateFactory(); + var state = TaskStateServiceBuilder.Build(dbFactory).State; + var wt = new WorktreeManager(new ClaudeDo.Data.Git.GitService(), dbFactory, _cfg, NullLogger.Instance); + return new TaskRunner(claude, dbFactory, new HubBroadcaster(new CapturingHubContext()), wt, + new ClaudeArgsBuilder(), _cfg, NullLogger.Instance, state, new TaskRunTokenRegistry()); + } + + [Fact] + public async Task RunAsync_TaskAlreadyRunning_NoProcessStarted_NoRunRecord() + { + string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString(); + using (var ctx = _db.CreateContext()) + { + ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow }); + ctx.Tasks.Add(new TaskEntity + { + Id = taskId, ListId = listId, Title = "Already running", + Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow, + }); + await ctx.SaveChangesAsync(); + } + + var fake = new FakeClaudeProcess(); + var runner = BuildRunner(fake); + + using (var ctx = _db.CreateContext()) + await runner.RunAsync((await new TaskRepository(ctx).GetByIdAsync(taskId))!, "slot-1", CancellationToken.None); + + Assert.Equal(0, fake.CallCount); + + using var verify = _db.CreateContext(); + Assert.Empty(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId)); + } + + [Fact] + public async Task ContinueAsync_TaskAlreadyRunning_NoProcessStarted_NoNewRunRecord() + { + string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString(); + using (var ctx = _db.CreateContext()) + { + ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow }); + ctx.Tasks.Add(new TaskEntity + { + Id = taskId, ListId = listId, Title = "Already running", + Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow, + }); + await ctx.SaveChangesAsync(); + + await new TaskRunRepository(ctx).AddAsync(new TaskRunEntity + { + Id = Guid.NewGuid().ToString(), TaskId = taskId, RunNumber = 1, + IsRetry = false, Prompt = "p", SessionId = "sess-guard-test", + StartedAt = DateTime.UtcNow.AddMinutes(-5), FinishedAt = DateTime.UtcNow.AddMinutes(-1), + ExitCode = 0, ResultMarkdown = "ok", + }); + } + + var fake = new FakeClaudeProcess(); + var runner = BuildRunner(fake); + + await runner.ContinueAsync(taskId, "follow up", "slot-1", CancellationToken.None); + + Assert.Equal(0, fake.CallCount); + + using var verify = _db.CreateContext(); + Assert.Single(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId)); + } +}