fix(worker): address concurrency, cancellation, and resource issues
- claude process: run stdout/stderr reads without ct; rely on kill-on-cancel closing the pipes to unblock them — previously ReadLineAsync(ct) could hang, stalling task slots and shutdown - task runner: terminal db writes (task_runs, MarkDone, MarkFailed, SetLogPath) now use CancellationToken.None; RunOnceAsync catches OCE and finalizes the run row so ContinueAsync can resume - task repository: GetNextQueuedAgentTaskAsync is now a single UPDATE ... RETURNING statement — closes TOCTOU window where two loop iterations could dispatch the same queued task - queue service: dispose CancellationTokenSource in slot-completion ContinueWith to stop leaking wait handles - git service: register ct.Kill(processTree), drain reads without ct, always reap via WaitForExitAsync(None) — no more git zombies on cancelled worktree ops - worktree manager: branch name uses full task id (dashes stripped) instead of 8-char prefix, eliminating collision risk Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -174,26 +174,36 @@ public sealed class TaskRepository
|
||||
|
||||
public async Task<TaskEntity?> GetNextQueuedAgentTaskAsync(DateTime now, CancellationToken ct = default)
|
||||
{
|
||||
// Atomically claim the next queued agent task: the UPDATE flips its
|
||||
// status to 'running' in the same statement that returns its row,
|
||||
// eliminating the TOCTOU gap where two queue-loop iterations could
|
||||
// both select the same queued task before either marked it running.
|
||||
// The caller is responsible for populating started_at shortly after.
|
||||
await using var conn = _factory.Open();
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = """
|
||||
SELECT t.id, t.list_id, t.title, t.description, t.status, t.scheduled_for,
|
||||
t.result, t.log_path, t.created_at, t.started_at, t.finished_at, t.commit_type,
|
||||
t.model, t.system_prompt, t.agent_path
|
||||
FROM tasks t
|
||||
WHERE t.status = 'queued'
|
||||
AND (t.scheduled_for IS NULL OR t.scheduled_for <= @now)
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM task_tags tt
|
||||
JOIN tags tg ON tg.id = tt.tag_id
|
||||
WHERE tt.task_id = t.id AND tg.name = 'agent'
|
||||
UNION
|
||||
SELECT 1 FROM list_tags lt
|
||||
JOIN tags tg ON tg.id = lt.tag_id
|
||||
WHERE lt.list_id = t.list_id AND tg.name = 'agent'
|
||||
)
|
||||
ORDER BY t.created_at ASC
|
||||
LIMIT 1
|
||||
UPDATE tasks
|
||||
SET status = 'running'
|
||||
WHERE id = (
|
||||
SELECT t.id
|
||||
FROM tasks t
|
||||
WHERE t.status = 'queued'
|
||||
AND (t.scheduled_for IS NULL OR t.scheduled_for <= @now)
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM task_tags tt
|
||||
JOIN tags tg ON tg.id = tt.tag_id
|
||||
WHERE tt.task_id = t.id AND tg.name = 'agent'
|
||||
UNION
|
||||
SELECT 1 FROM list_tags lt
|
||||
JOIN tags tg ON tg.id = lt.tag_id
|
||||
WHERE lt.list_id = t.list_id AND tg.name = 'agent'
|
||||
)
|
||||
ORDER BY t.created_at ASC
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, list_id, title, description, status, scheduled_for,
|
||||
result, log_path, created_at, started_at, finished_at, commit_type,
|
||||
model, system_prompt, agent_path
|
||||
""";
|
||||
cmd.Parameters.AddWithValue("@now", now.ToString("o"));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user