refactor(worker/queue): split queue waker and picker, auto-wake on enqueue

Slice 3 of the worker state and queue consolidation refactor.

- Add IQueueWaker / QueueWaker (singleton holding the wake semaphore).
- Add IQueuePicker / QueuePicker; raw SQL UPDATE...RETURNING moves out of
  TaskRepository.GetNextQueuedAgentTaskAsync (deleted) and now also filters
  on blocked_by_task_id IS NULL and writes started_at on claim.
- TaskStateService takes IQueueWaker directly; the Func<QueueService>
  indirection is gone. State transitions to Queued auto-wake the dispatcher.
- QueueService waits via the shared waker and dispatches via the picker.
- Drop explicit _queue.WakeQueue() calls in WorkerHub.QueuePlanningSubtasksAsync
  and ExternalMcpService.AddTask. The hub WakeQueue endpoint stays for
  diagnostics, delegating to _waker.Wake().
- Migrate tests; pre-existing flaky AppSettings/ExternalMcp tests untouched.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mika Kuns
2026-04-27 12:05:54 +02:00
parent 8823265e5a
commit 064a903076
18 changed files with 354 additions and 191 deletions

View File

@@ -2,6 +2,7 @@ using ClaudeDo.Data;
using ClaudeDo.Data.Models;
using ClaudeDo.Data.Repositories;
using ClaudeDo.Worker.Config;
using ClaudeDo.Worker.Queue;
using ClaudeDo.Worker.Runner;
using Microsoft.EntityFrameworkCore;
@@ -20,23 +21,27 @@ public sealed class QueueService : BackgroundService
private readonly TaskRunner _runner;
private readonly WorkerConfig _cfg;
private readonly ILogger<QueueService> _logger;
private readonly QueueWaker _waker;
private readonly IQueuePicker _picker;
private readonly object _lock = new();
private volatile QueueSlotState? _queueSlot;
private volatile QueueSlotState? _overrideSlot;
private readonly SemaphoreSlim _wakeSignal = new(0, 1);
public QueueService(
IDbContextFactory<ClaudeDoDbContext> dbFactory,
TaskRunner runner,
WorkerConfig cfg,
ILogger<QueueService> logger)
ILogger<QueueService> logger,
QueueWaker waker,
IQueuePicker picker)
{
_dbFactory = dbFactory;
_runner = runner;
_cfg = cfg;
_logger = logger;
_waker = waker;
_picker = picker;
}
public IReadOnlyList<(string slot, string taskId, DateTime startedAt)> GetActive()
@@ -49,13 +54,6 @@ public sealed class QueueService : BackgroundService
return list;
}
public void WakeQueue()
{
// Release if not already signalled.
try { _wakeSignal.Release(); }
catch (SemaphoreFullException) { /* already signalled */ }
}
public async Task RunNow(string taskId)
{
using (var context = _dbFactory.CreateDbContext())
@@ -147,25 +145,14 @@ public sealed class QueueService : BackgroundService
try
{
// Wait for wake signal or backstop timer.
var wakeTask = _wakeSignal.WaitAsync(stoppingToken);
var wakeTask = _waker.WaitAsync(stoppingToken);
var timerTask = timer.WaitForNextTickAsync(stoppingToken).AsTask();
await Task.WhenAny(wakeTask, timerTask);
// Drain wake signal if it fired.
if (wakeTask.IsCompletedSuccessfully)
{
// Good — signal consumed.
}
if (_queueSlot is not null) continue;
TaskEntity? task;
using (var context = _dbFactory.CreateDbContext())
{
var taskRepo = new TaskRepository(context);
task = await taskRepo.GetNextQueuedAgentTaskAsync(DateTime.UtcNow, stoppingToken);
}
var task = await _picker.ClaimNextAsync(DateTime.UtcNow, stoppingToken);
if (task is null) continue;
lock (_lock)
@@ -181,7 +168,7 @@ public sealed class QueueService : BackgroundService
_logger.LogError(t.Exception, "RunInSlotAsync failed for task {TaskId} in queue slot", task.Id);
lock (_lock) { _queueSlot = null; }
cts.Dispose();
WakeQueue(); // Check for next task immediately.
_waker.Wake(); // Check for next task immediately.
}, TaskScheduler.Default);
}
}