Slice 3 of the worker state and queue consolidation refactor. - Add IQueueWaker / QueueWaker (singleton holding the wake semaphore). - Add IQueuePicker / QueuePicker; raw SQL UPDATE...RETURNING moves out of TaskRepository.GetNextQueuedAgentTaskAsync (deleted) and now also filters on blocked_by_task_id IS NULL and writes started_at on claim. - TaskStateService takes IQueueWaker directly; the Func<QueueService> indirection is gone. State transitions to Queued auto-wake the dispatcher. - QueueService waits via the shared waker and dispatches via the picker. - Drop explicit _queue.WakeQueue() calls in WorkerHub.QueuePlanningSubtasksAsync and ExternalMcpService.AddTask. The hub WakeQueue endpoint stays for diagnostics, delegating to _waker.Wake(). - Migrate tests; pre-existing flaky AppSettings/ExternalMcp tests untouched. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
223 lines
7.7 KiB
C#
223 lines
7.7 KiB
C#
using ClaudeDo.Data;
|
|
using ClaudeDo.Data.Models;
|
|
using ClaudeDo.Data.Repositories;
|
|
using ClaudeDo.Worker.Config;
|
|
using ClaudeDo.Worker.Queue;
|
|
using ClaudeDo.Worker.Runner;
|
|
using Microsoft.EntityFrameworkCore;
|
|
|
|
namespace ClaudeDo.Worker.Services;
|
|
|
|
public sealed class QueueSlotState
|
|
{
|
|
public required string TaskId { get; init; }
|
|
public required DateTime StartedAt { get; init; }
|
|
public required CancellationTokenSource Cts { get; init; }
|
|
}
|
|
|
|
public sealed class QueueService : BackgroundService
|
|
{
|
|
private readonly IDbContextFactory<ClaudeDoDbContext> _dbFactory;
|
|
private readonly TaskRunner _runner;
|
|
private readonly WorkerConfig _cfg;
|
|
private readonly ILogger<QueueService> _logger;
|
|
private readonly QueueWaker _waker;
|
|
private readonly IQueuePicker _picker;
|
|
|
|
private readonly object _lock = new();
|
|
private volatile QueueSlotState? _queueSlot;
|
|
private volatile QueueSlotState? _overrideSlot;
|
|
|
|
public QueueService(
|
|
IDbContextFactory<ClaudeDoDbContext> dbFactory,
|
|
TaskRunner runner,
|
|
WorkerConfig cfg,
|
|
ILogger<QueueService> logger,
|
|
QueueWaker waker,
|
|
IQueuePicker picker)
|
|
{
|
|
_dbFactory = dbFactory;
|
|
_runner = runner;
|
|
_cfg = cfg;
|
|
_logger = logger;
|
|
_waker = waker;
|
|
_picker = picker;
|
|
}
|
|
|
|
public IReadOnlyList<(string slot, string taskId, DateTime startedAt)> GetActive()
|
|
{
|
|
var list = new List<(string, string, DateTime)>();
|
|
var q = _queueSlot;
|
|
if (q is not null) list.Add(("queue", q.TaskId, q.StartedAt));
|
|
var o = _overrideSlot;
|
|
if (o is not null) list.Add(("override", o.TaskId, o.StartedAt));
|
|
return list;
|
|
}
|
|
|
|
public async Task RunNow(string taskId)
|
|
{
|
|
using (var context = _dbFactory.CreateDbContext())
|
|
{
|
|
var taskRepo = new TaskRepository(context);
|
|
var exists = await taskRepo.GetByIdAsync(taskId);
|
|
if (exists is null)
|
|
throw new KeyNotFoundException($"Task '{taskId}' not found.");
|
|
}
|
|
|
|
lock (_lock)
|
|
{
|
|
if (_queueSlot?.TaskId == taskId)
|
|
throw new InvalidOperationException("task is already running in queue slot");
|
|
if (_overrideSlot is not null)
|
|
throw new InvalidOperationException("override slot busy");
|
|
|
|
var cts = new CancellationTokenSource();
|
|
_overrideSlot = new QueueSlotState { TaskId = taskId, StartedAt = DateTime.UtcNow, Cts = cts };
|
|
|
|
_ = RunInSlotAsync(taskId, "override", cts.Token).ContinueWith(t =>
|
|
{
|
|
if (t.IsFaulted)
|
|
_logger.LogError(t.Exception, "RunInSlotAsync failed for task {TaskId}", taskId);
|
|
lock (_lock) { _overrideSlot = null; }
|
|
cts.Dispose();
|
|
}, TaskScheduler.Default);
|
|
}
|
|
}
|
|
|
|
public async Task<string> ContinueTask(string taskId, string followUpPrompt)
|
|
{
|
|
using var context = _dbFactory.CreateDbContext();
|
|
var taskRepo = new TaskRepository(context);
|
|
var task = await taskRepo.GetByIdAsync(taskId)
|
|
?? throw new KeyNotFoundException($"Task '{taskId}' not found.");
|
|
|
|
if (task.Status == Data.Models.TaskStatus.Running)
|
|
throw new InvalidOperationException("task is already running");
|
|
|
|
lock (_lock)
|
|
{
|
|
if (_queueSlot?.TaskId == taskId)
|
|
throw new InvalidOperationException("task is already running in queue slot");
|
|
if (_overrideSlot is not null)
|
|
throw new InvalidOperationException("override slot busy");
|
|
|
|
var cts = new CancellationTokenSource();
|
|
_overrideSlot = new QueueSlotState { TaskId = taskId, StartedAt = DateTime.UtcNow, Cts = cts };
|
|
|
|
_ = RunContinueInSlotAsync(taskId, followUpPrompt, cts.Token).ContinueWith(t =>
|
|
{
|
|
if (t.IsFaulted)
|
|
_logger.LogError(t.Exception, "RunContinueInSlotAsync failed for task {TaskId}", taskId);
|
|
lock (_lock) { _overrideSlot = null; }
|
|
cts.Dispose();
|
|
}, TaskScheduler.Default);
|
|
}
|
|
|
|
return taskId;
|
|
}
|
|
|
|
public bool CancelTask(string taskId)
|
|
{
|
|
lock (_lock)
|
|
{
|
|
if (_queueSlot is not null && _queueSlot.TaskId == taskId)
|
|
{
|
|
_queueSlot.Cts.Cancel();
|
|
return true;
|
|
}
|
|
if (_overrideSlot is not null && _overrideSlot.TaskId == taskId)
|
|
{
|
|
_overrideSlot.Cts.Cancel();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
|
{
|
|
_logger.LogInformation("QueueService started");
|
|
|
|
using var timer = new PeriodicTimer(TimeSpan.FromMilliseconds(_cfg.QueueBackstopIntervalMs));
|
|
|
|
while (!stoppingToken.IsCancellationRequested)
|
|
{
|
|
try
|
|
{
|
|
// Wait for wake signal or backstop timer.
|
|
var wakeTask = _waker.WaitAsync(stoppingToken);
|
|
var timerTask = timer.WaitForNextTickAsync(stoppingToken).AsTask();
|
|
|
|
await Task.WhenAny(wakeTask, timerTask);
|
|
|
|
if (_queueSlot is not null) continue;
|
|
|
|
var task = await _picker.ClaimNextAsync(DateTime.UtcNow, stoppingToken);
|
|
if (task is null) continue;
|
|
|
|
lock (_lock)
|
|
{
|
|
if (_queueSlot is not null) continue;
|
|
|
|
var cts = CancellationTokenSource.CreateLinkedTokenSource(stoppingToken);
|
|
_queueSlot = new QueueSlotState { TaskId = task.Id, StartedAt = DateTime.UtcNow, Cts = cts };
|
|
|
|
_ = RunInSlotAsync(task.Id, "queue", cts.Token).ContinueWith(t =>
|
|
{
|
|
if (t.IsFaulted)
|
|
_logger.LogError(t.Exception, "RunInSlotAsync failed for task {TaskId} in queue slot", task.Id);
|
|
lock (_lock) { _queueSlot = null; }
|
|
cts.Dispose();
|
|
_waker.Wake(); // Check for next task immediately.
|
|
}, TaskScheduler.Default);
|
|
}
|
|
}
|
|
catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested)
|
|
{
|
|
break;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "QueueService loop error");
|
|
}
|
|
}
|
|
|
|
_logger.LogInformation("QueueService stopping");
|
|
}
|
|
|
|
private async Task RunInSlotAsync(string taskId, string slot, CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Starting task {TaskId} in {Slot} slot", taskId, slot);
|
|
|
|
TaskEntity task;
|
|
using (var context = _dbFactory.CreateDbContext())
|
|
{
|
|
var taskRepo = new TaskRepository(context);
|
|
task = await taskRepo.GetByIdAsync(taskId, ct)
|
|
?? throw new KeyNotFoundException($"Task '{taskId}' not found.");
|
|
}
|
|
|
|
await _runner.RunAsync(task, slot, ct);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Slot runner error for task {TaskId}", taskId);
|
|
}
|
|
}
|
|
|
|
private async Task RunContinueInSlotAsync(string taskId, string followUpPrompt, CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
_logger.LogInformation("Continuing task {TaskId} in override slot", taskId);
|
|
await _runner.ContinueAsync(taskId, followUpPrompt, "override", ct);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Continue runner error for task {TaskId}", taskId);
|
|
}
|
|
}
|
|
}
|