chore(claude-do): fix(worker): TaskRunner bricht ab, wenn StartRunningAsync fe
Befund (bestätigt): src/ClaudeDo.Worker/Runner/TaskRunner.cs:101 (RunAsync) und :211 (ContinueAsync) ignorieren das TransitionResult von _state.StartRunningAsync. Race-Szenario: Der QueuePicker claimt Queued→Running atomar; ruft der Override-Pfad (RunNow) kurz danach RunAsync für denselben Task auf, schlägt StartRunningAsync fehl (0 rows affected), der Runner startet Claude aber trotzdem → derselb ClaudeDo-Task: 44f86be2-7f3d-462e-98b3-eb94c0174eea
This commit is contained in:
@@ -98,7 +98,12 @@ public sealed class TaskRunner
|
|||||||
}
|
}
|
||||||
|
|
||||||
var now = DateTime.UtcNow;
|
var now = DateTime.UtcNow;
|
||||||
await _state.StartRunningAsync(task.Id, now, ct);
|
var startResult = await _state.StartRunningAsync(task.Id, now, ct);
|
||||||
|
if (!startResult.Ok)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", task.Id, startResult.Reason);
|
||||||
|
return;
|
||||||
|
}
|
||||||
await _broadcaster.TaskStarted(slot, task.Id, now);
|
await _broadcaster.TaskStarted(slot, task.Id, now);
|
||||||
|
|
||||||
// Build prompt: title + description + only the OPEN sub-tasks (resolved ones are dropped).
|
// Build prompt: title + description + only the OPEN sub-tasks (resolved ones are dropped).
|
||||||
@@ -208,7 +213,12 @@ public sealed class TaskRunner
|
|||||||
}
|
}
|
||||||
|
|
||||||
var now = DateTime.UtcNow;
|
var now = DateTime.UtcNow;
|
||||||
await _state.StartRunningAsync(taskId, now, ct);
|
var startResult = await _state.StartRunningAsync(taskId, now, ct);
|
||||||
|
if (!startResult.Ok)
|
||||||
|
{
|
||||||
|
_logger.LogWarning("Task {TaskId} skipped: StartRunningAsync rejected ({Reason})", taskId, startResult.Reason);
|
||||||
|
return;
|
||||||
|
}
|
||||||
await _broadcaster.TaskStarted(slot, taskId, now);
|
await _broadcaster.TaskStarted(slot, taskId, now);
|
||||||
|
|
||||||
try
|
try
|
||||||
|
|||||||
98
tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs
Normal file
98
tests/ClaudeDo.Worker.Tests/Runner/StartRunningGuardTests.cs
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
using ClaudeDo.Data.Models;
|
||||||
|
using ClaudeDo.Data.Repositories;
|
||||||
|
using ClaudeDo.Worker.Config;
|
||||||
|
using ClaudeDo.Worker.Hub;
|
||||||
|
using ClaudeDo.Worker.Runner;
|
||||||
|
using ClaudeDo.Worker.Tests.Infrastructure;
|
||||||
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
|
using TaskStatus = ClaudeDo.Data.Models.TaskStatus;
|
||||||
|
|
||||||
|
namespace ClaudeDo.Worker.Tests.Runner;
|
||||||
|
|
||||||
|
/// Verifies that RunAsync and ContinueAsync abort cleanly when StartRunningAsync fails
|
||||||
|
/// (e.g. the task is already Running due to a concurrent RunNow / QueuePicker race).
|
||||||
|
public sealed class StartRunningGuardTests : IDisposable
|
||||||
|
{
|
||||||
|
private readonly DbFixture _db = new();
|
||||||
|
private readonly string _tempDir;
|
||||||
|
private readonly WorkerConfig _cfg;
|
||||||
|
|
||||||
|
public StartRunningGuardTests()
|
||||||
|
{
|
||||||
|
_tempDir = Path.Combine(Path.GetTempPath(), $"cd_guard_{Guid.NewGuid():N}");
|
||||||
|
Directory.CreateDirectory(_tempDir);
|
||||||
|
_cfg = new WorkerConfig { SandboxRoot = _tempDir, LogRoot = _tempDir };
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Dispose() { _db.Dispose(); try { Directory.Delete(_tempDir, true); } catch { } }
|
||||||
|
|
||||||
|
private TaskRunner BuildRunner(IClaudeProcess claude)
|
||||||
|
{
|
||||||
|
var dbFactory = _db.CreateFactory();
|
||||||
|
var state = TaskStateServiceBuilder.Build(dbFactory).State;
|
||||||
|
var wt = new WorktreeManager(new ClaudeDo.Data.Git.GitService(), dbFactory, _cfg, NullLogger<WorktreeManager>.Instance);
|
||||||
|
return new TaskRunner(claude, dbFactory, new HubBroadcaster(new CapturingHubContext()), wt,
|
||||||
|
new ClaudeArgsBuilder(), _cfg, NullLogger<TaskRunner>.Instance, state, new TaskRunTokenRegistry());
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task RunAsync_TaskAlreadyRunning_NoProcessStarted_NoRunRecord()
|
||||||
|
{
|
||||||
|
string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString();
|
||||||
|
using (var ctx = _db.CreateContext())
|
||||||
|
{
|
||||||
|
ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow });
|
||||||
|
ctx.Tasks.Add(new TaskEntity
|
||||||
|
{
|
||||||
|
Id = taskId, ListId = listId, Title = "Already running",
|
||||||
|
Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow,
|
||||||
|
});
|
||||||
|
await ctx.SaveChangesAsync();
|
||||||
|
}
|
||||||
|
|
||||||
|
var fake = new FakeClaudeProcess();
|
||||||
|
var runner = BuildRunner(fake);
|
||||||
|
|
||||||
|
using (var ctx = _db.CreateContext())
|
||||||
|
await runner.RunAsync((await new TaskRepository(ctx).GetByIdAsync(taskId))!, "slot-1", CancellationToken.None);
|
||||||
|
|
||||||
|
Assert.Equal(0, fake.CallCount);
|
||||||
|
|
||||||
|
using var verify = _db.CreateContext();
|
||||||
|
Assert.Empty(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task ContinueAsync_TaskAlreadyRunning_NoProcessStarted_NoNewRunRecord()
|
||||||
|
{
|
||||||
|
string listId = Guid.NewGuid().ToString(), taskId = Guid.NewGuid().ToString();
|
||||||
|
using (var ctx = _db.CreateContext())
|
||||||
|
{
|
||||||
|
ctx.Lists.Add(new ListEntity { Id = listId, Name = "L", WorkingDir = null, CreatedAt = DateTime.UtcNow });
|
||||||
|
ctx.Tasks.Add(new TaskEntity
|
||||||
|
{
|
||||||
|
Id = taskId, ListId = listId, Title = "Already running",
|
||||||
|
Status = TaskStatus.Running, CreatedAt = DateTime.UtcNow,
|
||||||
|
});
|
||||||
|
await ctx.SaveChangesAsync();
|
||||||
|
|
||||||
|
await new TaskRunRepository(ctx).AddAsync(new TaskRunEntity
|
||||||
|
{
|
||||||
|
Id = Guid.NewGuid().ToString(), TaskId = taskId, RunNumber = 1,
|
||||||
|
IsRetry = false, Prompt = "p", SessionId = "sess-guard-test",
|
||||||
|
StartedAt = DateTime.UtcNow.AddMinutes(-5), FinishedAt = DateTime.UtcNow.AddMinutes(-1),
|
||||||
|
ExitCode = 0, ResultMarkdown = "ok",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
var fake = new FakeClaudeProcess();
|
||||||
|
var runner = BuildRunner(fake);
|
||||||
|
|
||||||
|
await runner.ContinueAsync(taskId, "follow up", "slot-1", CancellationToken.None);
|
||||||
|
|
||||||
|
Assert.Equal(0, fake.CallCount);
|
||||||
|
|
||||||
|
using var verify = _db.CreateContext();
|
||||||
|
Assert.Single(await new TaskRunRepository(verify).GetByTaskIdAsync(taskId));
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user