fix(worker): address concurrency, cancellation, and resource issues
- claude process: run stdout/stderr reads without ct; rely on kill-on-cancel closing the pipes to unblock them — previously ReadLineAsync(ct) could hang, stalling task slots and shutdown - task runner: terminal db writes (task_runs, MarkDone, MarkFailed, SetLogPath) now use CancellationToken.None; RunOnceAsync catches OCE and finalizes the run row so ContinueAsync can resume - task repository: GetNextQueuedAgentTaskAsync is now a single UPDATE ... RETURNING statement — closes TOCTOU window where two loop iterations could dispatch the same queued task - queue service: dispose CancellationTokenSource in slot-completion ContinueWith to stop leaking wait handles - git service: register ct.Kill(processTree), drain reads without ct, always reap via WaitForExitAsync(None) — no more git zombies on cancelled worktree ops - worktree manager: branch name uses full task id (dashes stripped) instead of 8-char prefix, eliminating collision risk Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -104,20 +104,34 @@ public sealed class GitService
|
||||
using var proc = new Process { StartInfo = psi };
|
||||
proc.Start();
|
||||
|
||||
// On cancellation: kill the git process tree. Killing closes the
|
||||
// redirected pipes, which unblocks the ReadToEndAsync calls below
|
||||
// and lets WaitForExitAsync return so the process is reaped.
|
||||
// Without this, cancelling mid-git leaves zombie processes.
|
||||
await using var ctr = ct.Register(() =>
|
||||
{
|
||||
try { proc.Kill(entireProcessTree: true); }
|
||||
catch { /* already exited */ }
|
||||
});
|
||||
|
||||
if (stdinData is not null)
|
||||
{
|
||||
await proc.StandardInput.WriteAsync(stdinData.AsMemory(), ct);
|
||||
proc.StandardInput.Close();
|
||||
}
|
||||
|
||||
var stdoutTask = proc.StandardOutput.ReadToEndAsync(ct);
|
||||
var stderrTask = proc.StandardError.ReadToEndAsync(ct);
|
||||
// Drain output without ct — pipes close when the process exits
|
||||
// (whether naturally or via Kill above), so these always complete.
|
||||
var stdoutTask = proc.StandardOutput.ReadToEndAsync();
|
||||
var stderrTask = proc.StandardError.ReadToEndAsync();
|
||||
|
||||
await proc.WaitForExitAsync(ct);
|
||||
await proc.WaitForExitAsync(CancellationToken.None);
|
||||
|
||||
var stdout = await stdoutTask;
|
||||
var stderr = await stderrTask;
|
||||
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
return (proc.ExitCode, stdout.TrimEnd(), stderr.TrimEnd());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,26 +174,36 @@ public sealed class TaskRepository
|
||||
|
||||
public async Task<TaskEntity?> GetNextQueuedAgentTaskAsync(DateTime now, CancellationToken ct = default)
|
||||
{
|
||||
// Atomically claim the next queued agent task: the UPDATE flips its
|
||||
// status to 'running' in the same statement that returns its row,
|
||||
// eliminating the TOCTOU gap where two queue-loop iterations could
|
||||
// both select the same queued task before either marked it running.
|
||||
// The caller is responsible for populating started_at shortly after.
|
||||
await using var conn = _factory.Open();
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = """
|
||||
SELECT t.id, t.list_id, t.title, t.description, t.status, t.scheduled_for,
|
||||
t.result, t.log_path, t.created_at, t.started_at, t.finished_at, t.commit_type,
|
||||
t.model, t.system_prompt, t.agent_path
|
||||
FROM tasks t
|
||||
WHERE t.status = 'queued'
|
||||
AND (t.scheduled_for IS NULL OR t.scheduled_for <= @now)
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM task_tags tt
|
||||
JOIN tags tg ON tg.id = tt.tag_id
|
||||
WHERE tt.task_id = t.id AND tg.name = 'agent'
|
||||
UNION
|
||||
SELECT 1 FROM list_tags lt
|
||||
JOIN tags tg ON tg.id = lt.tag_id
|
||||
WHERE lt.list_id = t.list_id AND tg.name = 'agent'
|
||||
)
|
||||
ORDER BY t.created_at ASC
|
||||
LIMIT 1
|
||||
UPDATE tasks
|
||||
SET status = 'running'
|
||||
WHERE id = (
|
||||
SELECT t.id
|
||||
FROM tasks t
|
||||
WHERE t.status = 'queued'
|
||||
AND (t.scheduled_for IS NULL OR t.scheduled_for <= @now)
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM task_tags tt
|
||||
JOIN tags tg ON tg.id = tt.tag_id
|
||||
WHERE tt.task_id = t.id AND tg.name = 'agent'
|
||||
UNION
|
||||
SELECT 1 FROM list_tags lt
|
||||
JOIN tags tg ON tg.id = lt.tag_id
|
||||
WHERE lt.list_id = t.list_id AND tg.name = 'agent'
|
||||
)
|
||||
ORDER BY t.created_at ASC
|
||||
LIMIT 1
|
||||
)
|
||||
RETURNING id, list_id, title, description, status, scheduled_for,
|
||||
result, log_path, created_at, started_at, finished_at, commit_type,
|
||||
model, system_prompt, agent_path
|
||||
""";
|
||||
cmd.Parameters.AddWithValue("@now", now.ToString("o"));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user