188 lines
5.3 KiB
TypeScript
188 lines
5.3 KiB
TypeScript
import { activityMonitor } from "./activity.js";
|
|
|
|
export interface SearchResult {
|
|
title: string;
|
|
url: string;
|
|
snippet: string;
|
|
}
|
|
|
|
export interface SearchResponse {
|
|
answer: string;
|
|
results: SearchResult[];
|
|
}
|
|
|
|
export interface SearchOptions {
|
|
numResults?: number;
|
|
recencyFilter?: "day" | "week" | "month" | "year";
|
|
domainFilter?: string[];
|
|
signal?: AbortSignal;
|
|
}
|
|
|
|
const DDG_HTML = "https://duckduckgo.com/html/";
|
|
const DDG_LITE = "https://lite.duckduckgo.com/lite/";
|
|
|
|
function applyDomainFilter(urls: SearchResult[], domains?: string[]): SearchResult[] {
|
|
if (!domains || domains.length === 0) return urls;
|
|
const includes = domains.filter((d) => !d.startsWith("-")).map((d) => d.toLowerCase());
|
|
const excludes = domains.filter((d) => d.startsWith("-")).map((d) => d.slice(1).toLowerCase());
|
|
return urls.filter((r) => {
|
|
try {
|
|
const host = new URL(r.url).hostname.toLowerCase();
|
|
if (includes.length && !includes.some((d) => host === d || host.endsWith(`.${d}`))) {
|
|
return false;
|
|
}
|
|
if (excludes.some((d) => host === d || host.endsWith(`.${d}`))) {
|
|
return false;
|
|
}
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
}
|
|
|
|
function extractResultsFromHtml(html: string): SearchResult[] {
|
|
const results: SearchResult[] = [];
|
|
const resultRegex = /<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:<a[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/a>|<div[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/div>)/g;
|
|
for (const match of html.matchAll(resultRegex)) {
|
|
const url = decodeUrl(match[1]);
|
|
const title = stripTags(match[2]);
|
|
const snippet = stripTags(match[3] || match[4] || "");
|
|
if (!url || !title) continue;
|
|
results.push({ title, url, snippet });
|
|
}
|
|
if (results.length > 0) return results;
|
|
|
|
const liteRegex = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>\s*<br\s*\/?>\s*<span[^>]*class="link-text"[^>]*>([\s\S]*?)<\/span>\s*<br\s*\/?>\s*<span[^>]*class="result-snippet"[^>]*>([\s\S]*?)<\/span>/g;
|
|
for (const match of html.matchAll(liteRegex)) {
|
|
const url = decodeUrl(match[1]);
|
|
const title = stripTags(match[2]);
|
|
const snippet = stripTags(match[4] || "");
|
|
if (!url || !title) continue;
|
|
results.push({ title, url, snippet });
|
|
}
|
|
return results;
|
|
}
|
|
|
|
function stripTags(text: string): string {
|
|
return text.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function decodeUrl(url: string): string {
|
|
try {
|
|
const decoded = new URL(url, "https://duckduckgo.com");
|
|
const uddg = decoded.searchParams.get("uddg");
|
|
if (uddg) return decodeURIComponent(uddg);
|
|
return decoded.toString();
|
|
} catch {
|
|
return url;
|
|
}
|
|
}
|
|
|
|
function buildQuery(query: string, options: SearchOptions): string {
|
|
let q = query;
|
|
if (options.recencyFilter) {
|
|
const recency: Record<string, string> = {
|
|
day: "d",
|
|
week: "w",
|
|
month: "m",
|
|
year: "y",
|
|
};
|
|
q += ` time:${recency[options.recencyFilter]}`;
|
|
}
|
|
return q;
|
|
}
|
|
|
|
export async function searchWithDuckDuckGo(
|
|
query: string,
|
|
options: SearchOptions = {},
|
|
): Promise<SearchResponse> {
|
|
const activityId = activityMonitor.logStart({ type: "api", query });
|
|
const q = buildQuery(query, options);
|
|
const params = new URLSearchParams({ q });
|
|
const url = `${DDG_HTML}?${params.toString()}`;
|
|
|
|
let res: Response;
|
|
try {
|
|
res = await fetch(url, {
|
|
headers: {
|
|
"user-agent": "Mozilla/5.0",
|
|
"accept-language": "en-US,en;q=0.9",
|
|
},
|
|
signal: options.signal,
|
|
});
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
activityMonitor.logError(activityId, message);
|
|
throw err;
|
|
}
|
|
|
|
if (!res.ok) {
|
|
activityMonitor.logComplete(activityId, res.status);
|
|
throw new Error(`DuckDuckGo error ${res.status}`);
|
|
}
|
|
|
|
const html = await res.text();
|
|
activityMonitor.logComplete(activityId, res.status);
|
|
|
|
const results = extractResultsFromHtml(html);
|
|
let filtered = applyDomainFilter(results, options.domainFilter);
|
|
const limit = Math.min(options.numResults ?? 5, 20);
|
|
filtered = filtered.slice(0, limit);
|
|
|
|
if (filtered.length === 0) {
|
|
// fallback to lite
|
|
return searchWithDuckDuckGoLite(query, options);
|
|
}
|
|
|
|
const answer = filtered
|
|
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
|
|
.join("\n\n");
|
|
|
|
return { answer, results: filtered };
|
|
}
|
|
|
|
async function searchWithDuckDuckGoLite(
|
|
query: string,
|
|
options: SearchOptions,
|
|
): Promise<SearchResponse> {
|
|
const activityId = activityMonitor.logStart({ type: "api", query: `${query} (lite)` });
|
|
const q = buildQuery(query, options);
|
|
const params = new URLSearchParams({ q });
|
|
const url = `${DDG_LITE}?${params.toString()}`;
|
|
|
|
let res: Response;
|
|
try {
|
|
res = await fetch(url, {
|
|
headers: {
|
|
"user-agent": "Mozilla/5.0",
|
|
"accept-language": "en-US,en;q=0.9",
|
|
},
|
|
signal: options.signal,
|
|
});
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
activityMonitor.logError(activityId, message);
|
|
throw err;
|
|
}
|
|
|
|
if (!res.ok) {
|
|
activityMonitor.logComplete(activityId, res.status);
|
|
throw new Error(`DuckDuckGo lite error ${res.status}`);
|
|
}
|
|
|
|
const html = await res.text();
|
|
activityMonitor.logComplete(activityId, res.status);
|
|
|
|
const results = extractResultsFromHtml(html);
|
|
let filtered = applyDomainFilter(results, options.domainFilter);
|
|
const limit = Math.min(options.numResults ?? 5, 20);
|
|
filtered = filtered.slice(0, limit);
|
|
|
|
const answer = filtered
|
|
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
|
|
.join("\n\n");
|
|
|
|
return { answer, results: filtered };
|
|
}
|