import { activityMonitor } from "./activity.js"; export interface SearchResult { title: string; url: string; snippet: string; } export interface SearchResponse { answer: string; results: SearchResult[]; } export interface SearchOptions { numResults?: number; recencyFilter?: "day" | "week" | "month" | "year"; domainFilter?: string[]; signal?: AbortSignal; } const DDG_HTML = "https://duckduckgo.com/html/"; const DDG_LITE = "https://lite.duckduckgo.com/lite/"; function applyDomainFilter(urls: SearchResult[], domains?: string[]): SearchResult[] { if (!domains || domains.length === 0) return urls; const includes = domains.filter((d) => !d.startsWith("-")).map((d) => d.toLowerCase()); const excludes = domains.filter((d) => d.startsWith("-")).map((d) => d.slice(1).toLowerCase()); return urls.filter((r) => { try { const host = new URL(r.url).hostname.toLowerCase(); if (includes.length && !includes.some((d) => host === d || host.endsWith(`.${d}`))) { return false; } if (excludes.some((d) => host === d || host.endsWith(`.${d}`))) { return false; } return true; } catch { return false; } }); } function extractResultsFromHtml(html: string): SearchResult[] { const results: SearchResult[] = []; const resultRegex = /]+class="result__a"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:]+class="result__snippet"[^>]*>([\s\S]*?)<\/a>|]+class="result__snippet"[^>]*>([\s\S]*?)<\/div>)/g; for (const match of html.matchAll(resultRegex)) { const url = decodeUrl(match[1]); const title = stripTags(match[2]); const snippet = stripTags(match[3] || match[4] || ""); if (!url || !title) continue; results.push({ title, url, snippet }); } if (results.length > 0) return results; const liteRegex = /]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>\s*\s*]*class="link-text"[^>]*>([\s\S]*?)<\/span>\s*\s*]*class="result-snippet"[^>]*>([\s\S]*?)<\/span>/g; for (const match of html.matchAll(liteRegex)) { const url = decodeUrl(match[1]); const title = stripTags(match[2]); const snippet = stripTags(match[4] || ""); if (!url || !title) continue; results.push({ title, url, snippet }); } return results; } function stripTags(text: string): string { return text.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim(); } function decodeUrl(url: string): string { try { const decoded = new URL(url, "https://duckduckgo.com"); const uddg = decoded.searchParams.get("uddg"); if (uddg) return decodeURIComponent(uddg); return decoded.toString(); } catch { return url; } } function buildQuery(query: string, options: SearchOptions): string { let q = query; if (options.recencyFilter) { const recency: Record = { day: "d", week: "w", month: "m", year: "y", }; q += ` time:${recency[options.recencyFilter]}`; } return q; } export async function searchWithDuckDuckGo( query: string, options: SearchOptions = {}, ): Promise { const activityId = activityMonitor.logStart({ type: "api", query }); const q = buildQuery(query, options); const params = new URLSearchParams({ q }); const url = `${DDG_HTML}?${params.toString()}`; let res: Response; try { res = await fetch(url, { headers: { "user-agent": "Mozilla/5.0", "accept-language": "en-US,en;q=0.9", }, signal: options.signal, }); } catch (err) { const message = err instanceof Error ? err.message : String(err); activityMonitor.logError(activityId, message); throw err; } if (!res.ok) { activityMonitor.logComplete(activityId, res.status); throw new Error(`DuckDuckGo error ${res.status}`); } const html = await res.text(); activityMonitor.logComplete(activityId, res.status); const results = extractResultsFromHtml(html); let filtered = applyDomainFilter(results, options.domainFilter); const limit = Math.min(options.numResults ?? 5, 20); filtered = filtered.slice(0, limit); if (filtered.length === 0) { // fallback to lite return searchWithDuckDuckGoLite(query, options); } const answer = filtered .map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`) .join("\n\n"); return { answer, results: filtered }; } async function searchWithDuckDuckGoLite( query: string, options: SearchOptions, ): Promise { const activityId = activityMonitor.logStart({ type: "api", query: `${query} (lite)` }); const q = buildQuery(query, options); const params = new URLSearchParams({ q }); const url = `${DDG_LITE}?${params.toString()}`; let res: Response; try { res = await fetch(url, { headers: { "user-agent": "Mozilla/5.0", "accept-language": "en-US,en;q=0.9", }, signal: options.signal, }); } catch (err) { const message = err instanceof Error ? err.message : String(err); activityMonitor.logError(activityId, message); throw err; } if (!res.ok) { activityMonitor.logComplete(activityId, res.status); throw new Error(`DuckDuckGo lite error ${res.status}`); } const html = await res.text(); activityMonitor.logComplete(activityId, res.status); const results = extractResultsFromHtml(html); let filtered = applyDomainFilter(results, options.domainFilter); const limit = Math.min(options.numResults ?? 5, 20); filtered = filtered.slice(0, limit); const answer = filtered .map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`) .join("\n\n"); return { answer, results: filtered }; }