fix: Truth-Archiv nutzt Cursor-Pagination (?page=N wird vom Server ignoriert)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -47,11 +47,14 @@ for (let from = fromBlock; from <= head; from += STEP) {
|
|||||||
console.log(`On-chain: ${onchainCount} Events`);
|
console.log(`On-chain: ${onchainCount} Events`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Truth (best effort, letzte N Archiv-Seiten à 10 Posts) ──
|
// ── Truth (best effort, Cursor-Pagination rückwärts bis TRUTH_CUTOFF) ──
|
||||||
// Markup verifiziert 2026-06-12: Seite hat 10 Posts, je ein <div class="status"> (exakt).
|
// Markup verifiziert 2026-06-12: je Post ein <div class="status"> (exakt).
|
||||||
// Timestamp als menschenlesbarer Text in class="status-info__meta-item" (kein datetime=-Attribut!).
|
// Timestamp als menschenlesbarer Text in class="status-info__meta-item" (kein datetime=-Attribut!).
|
||||||
// Post-Text in class="status__content". URL via href="https://trumpstruth.org/statuses/\d+".
|
// Post-Text in class="status__content". URL via href="https://trumpstruth.org/statuses/\d+".
|
||||||
|
// ACHTUNG: ?page=N wird vom Server IGNORIERT (liefert immer Seite 1) — Pagination ist
|
||||||
|
// Cursor-basiert: ?cursor=<base64 {status_created_at, _pointsToNextItems}> aus dem "older"-Link.
|
||||||
const pages = Number(args['truth-pages']);
|
const pages = Number(args['truth-pages']);
|
||||||
|
const TRUTH_CUTOFF = Date.parse('2024-09-01T00:00:00Z'); // Beginn der WLFI-Ära, älter brauchen wir nicht
|
||||||
const candidates: { symbol: string; ts: number; url: string }[] = [];
|
const candidates: { symbol: string; ts: number; url: string }[] = [];
|
||||||
let oldestTs = Infinity;
|
let oldestTs = Infinity;
|
||||||
|
|
||||||
@@ -72,21 +75,38 @@ function parseEasternTime(s: string): number {
|
|||||||
return utcGuess - offsetH * 3600_000; // ET 9:49 PM = UTC 9:49 PM − (−4h) = 01:49 nächster Tag
|
return utcGuess - offsetH * 3600_000; // ET 9:49 PM = UTC 9:49 PM − (−4h) = 01:49 nächster Tag
|
||||||
}
|
}
|
||||||
/** Seiten-Fetch mit 3 Versuchen (Timeouts/Netzfehler dürfen den Scan nicht crashen). */
|
/** Seiten-Fetch mit 3 Versuchen (Timeouts/Netzfehler dürfen den Scan nicht crashen). */
|
||||||
async function fetchPage(p: number): Promise<string | null> {
|
async function fetchPage(url: string): Promise<string | null> {
|
||||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`https://trumpstruth.org/?page=${p}`, { signal: AbortSignal.timeout(20_000) });
|
const res = await fetch(url, { signal: AbortSignal.timeout(20_000) });
|
||||||
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
||||||
return await res.text();
|
return await res.text();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.warn(`Seite ${p}, Versuch ${attempt}/3 fehlgeschlagen:`, err instanceof Error ? err.message : err);
|
console.warn(`${url}, Versuch ${attempt}/3 fehlgeschlagen:`, err instanceof Error ? err.message : err);
|
||||||
await Bun.sleep(2000 * attempt);
|
await Bun.sleep(2000 * attempt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** "Older"-Cursor aus dem HTML: der _pointsToNextItems-Cursor mit dem ältesten status_created_at. */
|
||||||
|
function nextCursor(html: string): string | null {
|
||||||
|
let best: { ts: string; cursor: string } | null = null;
|
||||||
|
for (const m of html.match(/cursor=([A-Za-z0-9+/=%]+)/g) ?? []) {
|
||||||
|
const raw = decodeURIComponent(m.slice(7));
|
||||||
|
try {
|
||||||
|
const j = JSON.parse(atob(raw));
|
||||||
|
if (j._pointsToNextItems === true && (!best || j.status_created_at < best.ts)) {
|
||||||
|
best = { ts: j.status_created_at, cursor: raw };
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
return best ? encodeURIComponent(best.cursor) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
let pageUrl = 'https://trumpstruth.org/?sort=desc&per_page=50';
|
||||||
for (let p = 1; p <= pages; p++) {
|
for (let p = 1; p <= pages; p++) {
|
||||||
const html = await fetchPage(p);
|
const html = await fetchPage(pageUrl);
|
||||||
if (html === null) { console.warn(`Seite ${p}: dauerhaft nicht erreichbar — Truth-Scan endet hier (best effort)`); break; }
|
if (html === null) { console.warn(`Seite ${p}: dauerhaft nicht erreichbar — Truth-Scan endet hier (best effort)`); break; }
|
||||||
// Split auf exaktes class="status" (nicht \b) — gibt genau 10 Post-Blöcke je Seite.
|
// Split auf exaktes class="status" (nicht \b) — gibt genau 10 Post-Blöcke je Seite.
|
||||||
// Vorsicht: class="status\b würde auch status__header, status__body etc. treffen (138 statt 10).
|
// Vorsicht: class="status\b würde auch status__header, status__body etc. treffen (138 statt 10).
|
||||||
@@ -105,6 +125,11 @@ for (let p = 1; p <= pages; p++) {
|
|||||||
const text = body.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
const text = body.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
||||||
for (const symbol of matchCoins(text)) candidates.push({ symbol, ts, url });
|
for (const symbol of matchCoins(text)) candidates.push({ symbol, ts, url });
|
||||||
}
|
}
|
||||||
|
if (p % 20 === 0) console.log(` … Seite ${p}, ältester Post bisher ${new Date(oldestTs).toISOString()}, ${candidates.length} Kandidaten`);
|
||||||
|
if (oldestTs < TRUTH_CUTOFF) { console.log(`Cutoff ${new Date(TRUTH_CUTOFF).toISOString()} erreicht — Scan komplett für die WLFI-Ära`); break; }
|
||||||
|
const cursor = nextCursor(html);
|
||||||
|
if (!cursor) { console.warn(`Seite ${p}: kein Older-Cursor — Archiv-Ende erreicht`); break; }
|
||||||
|
pageUrl = `https://trumpstruth.org/?sort=desc&per_page=50&cursor=${cursor}`;
|
||||||
await Bun.sleep(300);
|
await Bun.sleep(300);
|
||||||
}
|
}
|
||||||
console.log(`Truth-Scan: ${candidates.length} Kandidaten, ältester Post ${oldestTs === Infinity ? '—' : new Date(oldestTs).toISOString()}`);
|
console.log(`Truth-Scan: ${candidates.length} Kandidaten, ältester Post ${oldestTs === Infinity ? '—' : new Date(oldestTs).toISOString()}`);
|
||||||
|
|||||||
Reference in New Issue
Block a user