feat: Truth-Social-RSS-Parser + Coin-Keyword-Matching

This commit is contained in:
2026-06-12 08:05:06 +00:00
parent 917bcad8c3
commit 71d07659e3
2 changed files with 73 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
import { describe, expect, test } from 'bun:test';
import { matchCoins, parseTruthFeed } from './truth';
const XML = `<?xml version="1.0"?><rss><channel>
<item><link>https://trumpstruth.org/statuses/1</link><pubDate>Fri, 12 Jun 2026 01:49:56 +0000</pubDate><description><![CDATA[<p>Bitcoin is going to the MOON. Buy BTC!</p>]]></description></item>
<item><link>https://trumpstruth.org/statuses/2</link><pubDate>Thu, 11 Jun 2026 09:00:00 +0000</pubDate><description><![CDATA[Crooked media!]]></description></item>
</channel></rss>`;
describe('parseTruthFeed', () => {
test('extrahiert URL, Timestamp, Klartext (Tags entfernt)', () => {
const posts = parseTruthFeed(XML);
expect(posts).toHaveLength(2);
expect(posts[0].url).toBe('https://trumpstruth.org/statuses/1');
expect(posts[0].ts).toBe(Date.parse('Fri, 12 Jun 2026 01:49:56 +0000'));
expect(posts[0].text).toContain('Bitcoin is going to the MOON');
expect(posts[0].text).not.toContain('<p>');
});
});
describe('matchCoins', () => {
test('Name case-insensitive, Ticker nur exakt groß', () => {
expect(matchCoins('I love BITCOIN and solana')).toEqual(['BTC', 'SOL']);
expect(matchCoins('Buy ETH now')).toEqual(['ETH']);
expect(matchCoins('the ethics committee')).toEqual([]); // 'eth' klein/Teilwort matcht nicht
expect(matchCoins('Das sei seitwärts')).toEqual([]); // 'SEI' nur in Großschreibung
expect(matchCoins('THE ARENA IS PACKED')).toEqual([]); // 'ENA' nur mit Wortgrenze
expect(matchCoins('Tron will be huge')).toEqual(['TRX']); // nicht handelbar, aber Event
});
test('dedupliziert Mehrfach-Erwähnungen im selben Text', () => {
expect(matchCoins('BTC BTC Bitcoin')).toEqual(['BTC']);
});
});

View File

@@ -0,0 +1,41 @@
import { COIN_KEYWORDS } from './watchlist';
export interface TruthPost {
url: string;
ts: number; // Unix ms
text: string;
}
const esc = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
/** RSS-Items per Regex (kein XML-Parser nötig — Feed ist flach und stabil). */
export function parseTruthFeed(xml: string): TruthPost[] {
const posts: TruthPost[] = [];
for (const item of xml.match(/<item>[\s\S]*?<\/item>/g) ?? []) {
const url = item.match(/<link>([^<]+)<\/link>/)?.[1]?.trim();
const pubDate = item.match(/<pubDate>([^<]+)<\/pubDate>/)?.[1];
const descRaw = item.match(/<description>([\s\S]*?)<\/description>/)?.[1] ?? '';
if (!url || !pubDate) continue;
const ts = Date.parse(pubDate);
if (Number.isNaN(ts)) continue;
const text = descRaw
.replace(/^<!\[CDATA\[|\]\]>$/g, '')
.replace(/<[^>]+>/g, ' ')
.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"').replace(/&#39;/g, "'")
.replace(/\s+/g, ' ')
.trim();
posts.push({ url, ts, text });
}
return posts;
}
/** Erwähnte Coins (Symbole, dedupliziert, in COIN_KEYWORDS-Reihenfolge). */
export function matchCoins(text: string): string[] {
const hits: string[] = [];
for (const c of COIN_KEYWORDS) {
const nameHit = c.names.some((n) => new RegExp(`\\b${esc(n)}\\b`, 'i').test(text));
const tickerHit = c.tickers.some((t) => new RegExp(`\\b${esc(t)}\\b`).test(text)); // case-sensitive
if (nameHit || tickerHit) hits.push(c.symbol);
}
return hits;
}