feat(worker): ClaudeHistoryReader distills session logs

This commit is contained in:
mika kuns
2026-06-03 09:37:40 +02:00
parent 4cb7ad8dfa
commit bec87b3d6f
2 changed files with 209 additions and 0 deletions

View File

@@ -0,0 +1,131 @@
using System.Text.Json;
using ClaudeDo.Worker.Report.Interfaces;
namespace ClaudeDo.Worker.Report;
public sealed class ClaudeHistoryReader : IClaudeHistoryReader
{
private readonly string _projectsRoot;
public ClaudeHistoryReader(string projectsRoot) => _projectsRoot = projectsRoot;
public Task<IReadOnlyList<RepoActivity>> ReadAsync(
DateOnly start, DateOnly end, IReadOnlyList<string> excludedPrefixes, CancellationToken ct = default)
{
var buckets = new Dictionary<(string Repo, DateOnly Date), DayActivity>();
var normalizedExcludes = excludedPrefixes
.Select(NormalizePath).Where(p => p.Length > 0).ToArray();
if (Directory.Exists(_projectsRoot))
{
foreach (var file in Directory.EnumerateFiles(_projectsRoot, "*.jsonl", SearchOption.AllDirectories))
{
ct.ThrowIfCancellationRequested();
ReadFile(file, start, end, normalizedExcludes, buckets);
}
}
var repos = buckets
.GroupBy(kv => kv.Key.Repo)
.Select(g =>
{
var ra = new RepoActivity { RepoPath = g.Key };
foreach (var day in g.OrderBy(kv => kv.Key.Date).Select(kv => kv.Value))
ra.Days.Add(day);
return ra;
})
.OrderBy(r => r.RepoPath)
.ToList();
return Task.FromResult<IReadOnlyList<RepoActivity>>(repos);
}
private static void ReadFile(
string file, DateOnly start, DateOnly end, string[] excludes,
Dictionary<(string, DateOnly), DayActivity> buckets)
{
string? lastAssistantText = null;
string? lastAssistantRepo = null;
DateOnly lastAssistantDate = default;
foreach (var line in File.ReadLines(file))
{
if (string.IsNullOrWhiteSpace(line)) continue;
JsonDocument doc;
try { doc = JsonDocument.Parse(line); }
catch (JsonException) { continue; }
using (doc)
{
var root = doc.RootElement;
if (root.ValueKind != JsonValueKind.Object) continue;
if (!root.TryGetProperty("type", out var typeEl)) continue;
var type = typeEl.GetString();
if (type is not ("user" or "assistant")) continue;
if (!root.TryGetProperty("cwd", out var cwdEl) || cwdEl.ValueKind != JsonValueKind.String) continue;
var cwd = cwdEl.GetString()!;
if (IsExcluded(cwd, excludes)) continue;
if (!root.TryGetProperty("timestamp", out var tsEl) ||
!DateTimeOffset.TryParse(tsEl.GetString(), out var ts)) continue;
var date = DateOnly.FromDateTime(ts.LocalDateTime);
if (date < start || date > end) continue;
var text = ExtractText(root);
if (string.IsNullOrWhiteSpace(text)) continue;
if (type == "user")
{
if (text.Contains("<system-reminder>", StringComparison.OrdinalIgnoreCase)) continue;
Bucket(buckets, cwd, date).Prompts.Add(text.Trim());
}
else
{
lastAssistantText = text.Trim();
lastAssistantRepo = cwd;
lastAssistantDate = date;
}
}
}
if (lastAssistantText is not null && lastAssistantRepo is not null)
Bucket(buckets, lastAssistantRepo, lastAssistantDate).Summaries.Add(lastAssistantText);
}
private static DayActivity Bucket(
Dictionary<(string, DateOnly), DayActivity> buckets, string repo, DateOnly date)
{
var key = (repo, date);
if (!buckets.TryGetValue(key, out var day))
{
day = new DayActivity { Date = date };
buckets[key] = day;
}
return day;
}
private static string ExtractText(JsonElement root)
{
if (!root.TryGetProperty("message", out var msg) ||
!msg.TryGetProperty("content", out var content)) return "";
if (content.ValueKind == JsonValueKind.String) return content.GetString() ?? "";
if (content.ValueKind != JsonValueKind.Array) return "";
var parts = new List<string>();
foreach (var item in content.EnumerateArray())
{
if (item.ValueKind != JsonValueKind.Object) continue;
if (item.TryGetProperty("type", out var t) && t.GetString() == "text" &&
item.TryGetProperty("text", out var txt) && txt.ValueKind == JsonValueKind.String)
parts.Add(txt.GetString() ?? "");
}
return string.Join("\n", parts);
}
private static bool IsExcluded(string cwd, string[] excludes)
{
var norm = NormalizePath(cwd);
return excludes.Any(p => norm.StartsWith(p, StringComparison.Ordinal));
}
private static string NormalizePath(string p) =>
(p ?? "").Replace('/', '\\').TrimEnd('\\').ToLowerInvariant();
}

View File

@@ -0,0 +1,78 @@
using ClaudeDo.Worker.Report;
namespace ClaudeDo.Worker.Tests.Report;
public class ClaudeHistoryReaderTests : IDisposable
{
private readonly string _root;
public ClaudeHistoryReaderTests()
{
_root = Path.Combine(Path.GetTempPath(), $"cdh_{Guid.NewGuid():N}");
Directory.CreateDirectory(_root);
}
public void Dispose() { try { Directory.Delete(_root, true); } catch { } }
private void WriteSession(string projectDir, string file, params string[] lines)
{
var dir = Path.Combine(_root, projectDir);
Directory.CreateDirectory(dir);
File.WriteAllLines(Path.Combine(dir, file), lines);
}
private static string UserLine(string cwd, string ts, string text) =>
$$$"""{"type":"user","cwd":{{{Json(cwd)}}},"timestamp":"{{{ts}}}","message":{"role":"user","content":[{"type":"text","text":{{{Json(text)}}}}]}}""";
private static string AssistantLine(string cwd, string ts, string text) =>
$$$"""{"type":"assistant","cwd":{{{Json(cwd)}}},"timestamp":"{{{ts}}}","message":{"role":"assistant","content":[{"type":"text","text":{{{Json(text)}}}}]}}""";
private static string Json(string s) => System.Text.Json.JsonSerializer.Serialize(s);
[Fact]
public async Task Extracts_Prompts_And_Last_Assistant_Summary_GroupedByRepoAndDay()
{
WriteSession("proj", "s1.jsonl",
UserLine(@"C:\Dev\Repos\App", "2026-06-01T08:00:00Z", "Add login"),
AssistantLine(@"C:\Dev\Repos\App", "2026-06-01T08:05:00Z", "first summary"),
AssistantLine(@"C:\Dev\Repos\App", "2026-06-01T08:30:00Z", "final summary"));
var reader = new ClaudeHistoryReader(_root);
var result = await reader.ReadAsync(new DateOnly(2026, 6, 1), new DateOnly(2026, 6, 3),
Array.Empty<string>());
var repo = Assert.Single(result);
Assert.Equal(@"C:\Dev\Repos\App", repo.RepoPath);
var day = Assert.Single(repo.Days);
Assert.Equal(new[] { "Add login" }, day.Prompts);
Assert.Equal(new[] { "final summary" }, day.Summaries);
}
[Fact]
public async Task Drops_Sessions_Under_Excluded_Prefix_CaseInsensitive()
{
WriteSession("priv", "s.jsonl",
UserLine(@"C:\Private\Secret", "2026-06-01T08:00:00Z", "private work"));
var reader = new ClaudeHistoryReader(_root);
var result = await reader.ReadAsync(new DateOnly(2026, 6, 1), new DateOnly(2026, 6, 3),
new[] { @"c:\private" });
Assert.Empty(result);
}
[Fact]
public async Task Filters_By_Date_Window_And_Skips_Noise_And_Malformed()
{
WriteSession("proj", "s.jsonl",
"this is not json",
UserLine(@"C:\Dev\App", "2026-05-01T08:00:00Z", "too old"),
UserLine(@"C:\Dev\App", "2026-06-02T08:00:00Z", "in range"),
UserLine(@"C:\Dev\App", "2026-06-02T09:00:00Z", "noise <system-reminder> blah"));
var reader = new ClaudeHistoryReader(_root);
var result = await reader.ReadAsync(new DateOnly(2026, 6, 1), new DateOnly(2026, 6, 3),
Array.Empty<string>());
var day = Assert.Single(Assert.Single(result).Days);
Assert.Equal(new[] { "in range" }, day.Prompts);
}
}