fix(worker): clean up orphaned worktree when the DB row insert fails
If WorktreeAddAsync succeeds but the worktrees-row insert throws, the worktree was left on disk and branch undeleted with nothing tracking it. Wrap the insert in try/catch and best-effort remove the worktree+branch (non-cancellable) before rethrowing. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
13
docs/open.md
13
docs/open.md
@@ -31,14 +31,14 @@ Weitere Specs/Plans seit 2026-04-30 (für die Historie): `planning-draft-planned
|
|||||||
|
|
||||||
Der Code ist da; was fehlt, ist die **manuelle Bestätigung mit explizit notiertem Pass-Kriterium**. Ohne falsifizierbare Observable produziert ein Manual-Run nur „sah ok aus".
|
Der Code ist da; was fehlt, ist die **manuelle Bestätigung mit explizit notiertem Pass-Kriterium**. Ohne falsifizierbare Observable produziert ein Manual-Run nur „sah ok aus".
|
||||||
|
|
||||||
### 1.0 Plan-Verification — Kernpipeline (unverändert dringend)
|
### 1.0 Plan-Verification — Kernpipeline
|
||||||
|
|
||||||
Die End-to-End-Beweise für die eigentliche Ausführungspipeline wurden **nie durchgespielt**. Das ist das größte verbleibende Risiko: alle neuen Features sitzen auf einer Pipeline, deren Happy-Path nie unter echtem `claude` verifiziert wurde.
|
**2026-06-04: Kernpipeline läuft.** Der User bestätigt, dass Tasks mit echtem `claude` gestartet werden und der Happy-Path (non-worktree) durchläuft — Live-Stream und Wake-up inklusive. Die granularen Observables unten wurden nicht formell protokolliert; offen bleiben die worktree-spezifischen Pfade (5–7).
|
||||||
|
|
||||||
| # | Item | Status | Pass-Kriterium |
|
| # | Item | Status | Pass-Kriterium |
|
||||||
|---|------|---|---|
|
|---|------|---|---|
|
||||||
| 3 | Smoke-Spawn (`claude -p` „ping") | ⬜ | `task_runs`-Row mit `session_id NOT NULL`, `result` non-empty, `output_tokens > 0` |
|
| 3 | Smoke-Spawn (`claude -p` „ping") | ✅ | (in der Praxis bestätigt 2026-06-04; formale Observables nicht protokolliert) |
|
||||||
| 4 | E2E Happy Path (Non-Worktree) | ⬜ | Liste anlegen → Task → `tasks.status='Done'`, `result IS NOT NULL`, Logfile unter `~/.todo-app/logs/<taskId>.ndjson`, UI-Row mit Done-Badge |
|
| 4 | E2E Happy Path (Non-Worktree) | ✅ | (in der Praxis bestätigt 2026-06-04) |
|
||||||
| 5 | Worktree Happy Path | ⬜ | Liste mit `working_dir` auf temp-Repo, Task mit Codeänderung → `worktrees.state='active'`, `head_commit IS NOT NULL`, `diff_stat` non-empty, Branch `claudedo/<id>` auf Disk |
|
| 5 | Worktree Happy Path | ⬜ | Liste mit `working_dir` auf temp-Repo, Task mit Codeänderung → `worktrees.state='active'`, `head_commit IS NOT NULL`, `diff_stat` non-empty, Branch `claudedo/<id>` auf Disk |
|
||||||
| 6 | No-Changes-Run | ⬜ | Prompt der nichts ändert → `status='Done'`, `worktrees.head_commit IS NULL`, `diff_stat IS NULL` |
|
| 6 | No-Changes-Run | ⬜ | Prompt der nichts ändert → `status='Done'`, `worktrees.head_commit IS NULL`, `diff_stat IS NULL` |
|
||||||
| 7 | Kein Git-Repo | ⬜ | `working_dir=C:\Temp` → `status='Failed'`, **keine** `worktrees`-Row, Git-Fehler im Log |
|
| 7 | Kein Git-Repo | ⬜ | `working_dir=C:\Temp` → `status='Failed'`, **keine** `worktrees`-Row, Git-Fehler im Log |
|
||||||
@@ -111,10 +111,9 @@ Voraussetzung: Gitea-Release unter `git.kuns.dev/releases/ClaudeDo` mit `ClaudeD
|
|||||||
|
|
||||||
## 3. Worker-Robustheit
|
## 3. Worker-Robustheit
|
||||||
|
|
||||||
### 3.1 Worktree-Cleanup bei Anlage-Failed 🟡
|
### 3.1 Worktree-Cleanup bei Anlage-Failed ✅
|
||||||
- `WorktreeManager.cs:64-92` heilt eine „branch already exists"-Kollision (remove + prune + delete + retry).
|
- `WorktreeManager.cs:64-92` heilt eine „branch already exists"-Kollision (remove + prune + delete + retry).
|
||||||
- **Offen:** Es gibt **kein** `try/finally`, das `git worktree remove --force` ausführt, wenn `WorktreeAddAsync` erfolgreich war, aber der anschließende DB-Insert (`:95-109`) wirft. Genau dieser Crash-Pfad ist weiterhin ungeschützt.
|
- Der DB-Insert (`:96-122`) ist jetzt in `try/catch` gekapselt: schlägt er fehl, nachdem `WorktreeAddAsync` erfolgreich war, wird der verwaiste Worktree + Branch best-effort (non-cancellable) entfernt und die Exception rethrown. Test: `WorktreeManagerTests.CreateAsync_DbInsertFails_RemovesOrphanedWorktreeAndBranch`.
|
||||||
- **Aufwand:** klein.
|
|
||||||
|
|
||||||
### 3.2 CLI-Preflight beim Worker-Start ✅
|
### 3.2 CLI-Preflight beim Worker-Start ✅
|
||||||
- `Worker/Lifecycle/ClaudeCliPreflight.cs` + Wiring in `Program.cs`; skippable via `CLAUDEDO_SKIP_CLI_PREFLIGHT=1`; Tests vorhanden.
|
- `Worker/Lifecycle/ClaudeCliPreflight.cs` + Wiring in `Program.cs`; skippable via `CLAUDEDO_SKIP_CLI_PREFLIGHT=1`; Tests vorhanden.
|
||||||
|
|||||||
@@ -92,21 +92,37 @@ public sealed class WorktreeManager
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Insert worktrees row AFTER git succeeds — if git throws, no row is created.
|
// Insert worktrees row AFTER git succeeds — if git throws, no row is created.
|
||||||
using var context = _dbFactory.CreateDbContext();
|
// If the insert itself fails, the worktree is already on disk with nothing
|
||||||
var wtRepo = new WorktreeRepository(context);
|
// tracking it: remove it (best-effort, non-cancellable) before rethrowing.
|
||||||
// Drop any stale row from a prior run (force-remove may have left the DB side behind).
|
try
|
||||||
await wtRepo.DeleteAsync(task.Id, ct);
|
|
||||||
await wtRepo.AddAsync(new WorktreeEntity
|
|
||||||
{
|
{
|
||||||
TaskId = task.Id,
|
using var context = _dbFactory.CreateDbContext();
|
||||||
Path = worktreePath,
|
var wtRepo = new WorktreeRepository(context);
|
||||||
BranchName = branchName,
|
// Drop any stale row from a prior run (force-remove may have left the DB side behind).
|
||||||
BaseCommit = baseCommit,
|
await wtRepo.DeleteAsync(task.Id, ct);
|
||||||
HeadCommit = null,
|
await wtRepo.AddAsync(new WorktreeEntity
|
||||||
DiffStat = null,
|
{
|
||||||
State = WorktreeState.Active,
|
TaskId = task.Id,
|
||||||
CreatedAt = DateTime.UtcNow,
|
Path = worktreePath,
|
||||||
}, ct);
|
BranchName = branchName,
|
||||||
|
BaseCommit = baseCommit,
|
||||||
|
HeadCommit = null,
|
||||||
|
DiffStat = null,
|
||||||
|
State = WorktreeState.Active,
|
||||||
|
CreatedAt = DateTime.UtcNow,
|
||||||
|
}, ct);
|
||||||
|
}
|
||||||
|
catch (Exception dbEx)
|
||||||
|
{
|
||||||
|
_logger.LogError(dbEx,
|
||||||
|
"Failed to record worktree row for task {TaskId}; removing orphaned worktree at {Path}",
|
||||||
|
task.Id, worktreePath);
|
||||||
|
try { await _git.WorktreeRemoveAsync(workingDir, worktreePath, force: true, CancellationToken.None); }
|
||||||
|
catch (Exception rmEx) { _logger.LogWarning(rmEx, "Failed to remove orphaned worktree at {Path}", worktreePath); }
|
||||||
|
try { await _git.BranchDeleteAsync(workingDir, branchName, force: true, CancellationToken.None); }
|
||||||
|
catch (Exception delEx) { _logger.LogWarning(delEx, "Failed to delete orphaned branch {Branch}", branchName); }
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
_logger.LogInformation("Created worktree for task {TaskId} at {Path} (branch {Branch}, base {Base})",
|
_logger.LogInformation("Created worktree for task {TaskId} at {Path} (branch {Branch}, base {Base})",
|
||||||
task.Id, worktreePath, branchName, baseCommit);
|
task.Id, worktreePath, branchName, baseCommit);
|
||||||
|
|||||||
@@ -158,6 +158,37 @@ public class WorktreeManagerTests : IDisposable
|
|||||||
Assert.Null(row);
|
Assert.Null(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task CreateAsync_DbInsertFails_RemovesOrphanedWorktreeAndBranch()
|
||||||
|
{
|
||||||
|
if (!GitAvailable) { Assert.True(true, "git not available -- skipping"); return; }
|
||||||
|
|
||||||
|
var repo = CreateRepo();
|
||||||
|
var (task, list) = MakeEntities(repo.RepoDir);
|
||||||
|
|
||||||
|
// Seed the list but NOT the task: the worktrees-row insert references
|
||||||
|
// tasks(task_id) and fails the FK after `git worktree add` has succeeded.
|
||||||
|
var db = new DbFixture();
|
||||||
|
_dbFixtures.Add(db);
|
||||||
|
using (var seedCtx = db.CreateContext())
|
||||||
|
await new ListRepository(seedCtx).AddAsync(list);
|
||||||
|
|
||||||
|
var cfg = new WorkerConfig { WorktreeRootStrategy = "sibling" };
|
||||||
|
var mgr = new WorktreeManager(
|
||||||
|
new GitService(), db.CreateFactory(), cfg, NullLogger<WorktreeManager>.Instance);
|
||||||
|
|
||||||
|
await Assert.ThrowsAnyAsync<Exception>(
|
||||||
|
() => mgr.CreateAsync(task, list, CancellationToken.None));
|
||||||
|
|
||||||
|
var branchName = $"claudedo/{task.Id.Replace("-", "")}";
|
||||||
|
var branchList = GitRepoFixture.RunGit(repo.RepoDir, "branch", "--list", branchName);
|
||||||
|
Assert.True(string.IsNullOrWhiteSpace(branchList),
|
||||||
|
$"orphaned branch {branchName} should be cleaned up, got: {branchList}");
|
||||||
|
|
||||||
|
var worktreeList = GitRepoFixture.RunGit(repo.RepoDir, "worktree", "list");
|
||||||
|
Assert.DoesNotContain(task.Id, worktreeList);
|
||||||
|
}
|
||||||
|
|
||||||
private static (TaskEntity task, ListEntity list) MakeEntities(string workingDir)
|
private static (TaskEntity task, ListEntity list) MakeEntities(string workingDir)
|
||||||
{
|
{
|
||||||
var listId = Guid.NewGuid().ToString();
|
var listId = Guid.NewGuid().ToString();
|
||||||
|
|||||||
Reference in New Issue
Block a user