diff --git a/src/lib/scrapers/idealo.ts b/src/lib/scrapers/idealo.ts new file mode 100644 index 0000000..74fd253 --- /dev/null +++ b/src/lib/scrapers/idealo.ts @@ -0,0 +1,77 @@ +import * as cheerio from 'cheerio' +import type { PriceScraper, ScrapeResult } from './types' + +const UA = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36' + +export const idealoScraper: PriceScraper = { + shop: 'idealo', + async scrape(url: string): Promise { + try { + const res = await fetch(url, { + headers: { + 'User-Agent': UA, + 'Accept': 'text/html,application/xhtml+xml', + 'Accept-Language': 'de-DE,de;q=0.9', + }, + signal: AbortSignal.timeout(20_000), + }) + if (!res.ok) { + return { price: null, currency: 'EUR', availability: 'unknown', error: `HTTP ${res.status}` } + } + const $ = cheerio.load(await res.text()) + + const priceTexts = [ + $('[data-testid="detail-offer-price"]').first().text(), + $('meta[itemprop="price"]').attr('content'), + $('span.oopStage-conditionButton-price').first().text(), + $('strong.oopStage-price').first().text(), + $('span.oopStage-priceRangePrice').first().text(), + $('.oop-productOfferOutbox-priceAmount').first().text(), + extractJsonLdPrice($), + ].filter(Boolean) as string[] + + const price = parsePrice(priceTexts[0] ?? '') + const name = ( + $('h1#oopStage-title span').first().text() + || $('h1[data-testid="product-title"]').text() + || $('h1').first().text() + || '' + ).trim() + const imageUrl = $('meta[property="og:image"]').attr('content') || undefined + + if (price === null) { + return { price: null, currency: 'EUR', availability: 'unknown', name, imageUrl, error: 'price-selector-missed' } + } + return { price, currency: 'EUR', availability: 'in_stock', name, imageUrl } + } catch (err) { + return { price: null, currency: 'EUR', availability: 'unknown', error: (err as Error).message } + } + }, +} + +function extractJsonLdPrice($: cheerio.CheerioAPI): string | null { + const scripts = $('script[type="application/ld+json"]') + for (let i = 0; i < scripts.length; i++) { + const raw = $(scripts[i]).contents().text() + if (!raw) continue + try { + const data = JSON.parse(raw) + const offers = data?.offers + if (offers) { + const price = offers.price ?? offers.lowPrice ?? offers.highPrice + if (typeof price === 'number' && price > 0) return String(price) + if (typeof price === 'string' && price) return price + } + } catch { + // ignore malformed JSON-LD + } + } + return null +} + +function parsePrice(text: string): number | null { + const cleaned = text.replace(/[^\d.,]/g, '').replace(/\.(?=\d{3}(\D|$))/g, '').replace(',', '.') + if (!cleaned) return null + const n = parseFloat(cleaned) + return Number.isFinite(n) && n > 0 ? n : null +} diff --git a/tests/fixtures/idealo-headphones.html b/tests/fixtures/idealo-headphones.html new file mode 100644 index 0000000..0928ab6 --- /dev/null +++ b/tests/fixtures/idealo-headphones.html @@ -0,0 +1,2611 @@ + + + + + + + + + + + + + + + + + + Mobilis Elektrorollstuhl M45 Air rot ab 6780 € | Vergleiche 1 Angebote auf idealo.de + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+
+ +
+ + +
+ + +
+ idealo app icon + +
+

idealo: Produkt Preisvergleich

+

Die besten Angebote für Deine Lieblingsprodukte

+
+ + +
+
+ +
+ + + + + + +
+ + +
+
+ + + + + + + Merkzettel +
+
+
+
+ + + + + +
+
+ + +
+ +
+ +
+ + + +
+
+ + + + +
+
+
+
+ + + +
+
+ +

+ Mobilis Elektrorollstuhl M45 Air rot +

+
+ +
+ + + +
+ +
+ +
+
+ + + +
+ + Produktdetails +
+
+
+
+ +
+ + + +
+ Preisverlauf + +
+
+
+ +
+ + + + + + +
+ + Preiswecker +
+ +
+ +
+
+ +
+
+
+ Produktübersicht: + + Produktdetails + +
+
+
+
+
+ +
+ + Ähnliche Produkte: + + + Elektrorollstuhl + + +
+
+ +
+
+ + + + +
+
+
+
+
+
+
+
+ Preisentwicklung + +
+
+
+
+ +
+
+
+
+
+
+
+ + +
+
+ +

+ Produktdetails

+
+ Mobilis Elektrorollstuhl M45 Air rot + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ Produkttyp + Elektrorollstuhl
+ Allgemeine Informationen
+ Farbe + rot
+ Besonderheiten
+ Ausstattung + Sitz
+ Eigenschaften + faltbar, höhenverstellbar
+ Weitere Eigenschaften
+ Einsatzbereich + Innenbereich
+
+ +
+
+
+
+
+
+
+

+ Preisvergleich

+
+
+
+
+
+
+
+ + +
+ + +
+
+
+ + +
+ + +
+
+
+ + +
+ +
+
+
+ Sortieren nach: + + + + +
+
+
+
+
+
+
+ +
+
+

Wie zufrieden bist Du mit dem Preisvergleich?

+

Vielen Dank für Deine Teilnahme!

+
+
+ + + + + +
+
+
+

Wie können wir den Preisvergleich für dich verbessern (optional)?

+
+ +
+ + +
+
+
+
+ +
+ +
+
+
+ +
+
+ +
+
+

Weitere Stichworte zu dem Produkt Elektrorollstuhl M45 Air rot:

+ + + höhenverstellbar + + Sitz + + Innenbereich + + Elektrorollstuhl mit Hubfunktion +
+
+
+
+ + + + +
+
+
+ + + +
+
+
+
+
+
+ + +
\ No newline at end of file diff --git a/tests/scrapers/idealo.test.ts b/tests/scrapers/idealo.test.ts new file mode 100644 index 0000000..de54d0b --- /dev/null +++ b/tests/scrapers/idealo.test.ts @@ -0,0 +1,30 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { readFileSync } from 'node:fs' +import { join } from 'node:path' +import { idealoScraper } from '@/lib/scrapers/idealo' + +const fixture = readFileSync(join(__dirname, '../fixtures/idealo-headphones.html'), 'utf-8') + +beforeEach(() => { + global.fetch = vi.fn().mockResolvedValue({ + ok: true, status: 200, text: async () => fixture, + }) as unknown as typeof fetch +}) + +describe('idealoScraper', () => { + it('extracts price and name', async () => { + const r = await idealoScraper.scrape('https://www.idealo.de/foo') + expect(r.price).toBeGreaterThan(0) + expect(r.currency).toBe('EUR') + expect(r.name).toBeTruthy() + }) + + it('flags cloudflare challenge', async () => { + global.fetch = vi.fn().mockResolvedValue({ + ok: false, status: 403, text: async () => 'Cloudflare', + }) as unknown as typeof fetch + const r = await idealoScraper.scrape('https://www.idealo.de/foo') + expect(r.price).toBeNull() + expect(r.error).toMatch(/403|cloudflare/i) + }) +})