Re-add pi-web-access
This commit is contained in:
187
pi/files/agent/extensions/pi-web-access/ddg-search.ts
Normal file
187
pi/files/agent/extensions/pi-web-access/ddg-search.ts
Normal file
@@ -0,0 +1,187 @@
|
||||
import { activityMonitor } from "./activity.js";
|
||||
|
||||
export interface SearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
snippet: string;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
answer: string;
|
||||
results: SearchResult[];
|
||||
}
|
||||
|
||||
export interface SearchOptions {
|
||||
numResults?: number;
|
||||
recencyFilter?: "day" | "week" | "month" | "year";
|
||||
domainFilter?: string[];
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
const DDG_HTML = "https://duckduckgo.com/html/";
|
||||
const DDG_LITE = "https://lite.duckduckgo.com/lite/";
|
||||
|
||||
function applyDomainFilter(urls: SearchResult[], domains?: string[]): SearchResult[] {
|
||||
if (!domains || domains.length === 0) return urls;
|
||||
const includes = domains.filter((d) => !d.startsWith("-")).map((d) => d.toLowerCase());
|
||||
const excludes = domains.filter((d) => d.startsWith("-")).map((d) => d.slice(1).toLowerCase());
|
||||
return urls.filter((r) => {
|
||||
try {
|
||||
const host = new URL(r.url).hostname.toLowerCase();
|
||||
if (includes.length && !includes.some((d) => host === d || host.endsWith(`.${d}`))) {
|
||||
return false;
|
||||
}
|
||||
if (excludes.some((d) => host === d || host.endsWith(`.${d}`))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function extractResultsFromHtml(html: string): SearchResult[] {
|
||||
const results: SearchResult[] = [];
|
||||
const resultRegex = /<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:<a[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/a>|<div[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/div>)/g;
|
||||
for (const match of html.matchAll(resultRegex)) {
|
||||
const url = decodeUrl(match[1]);
|
||||
const title = stripTags(match[2]);
|
||||
const snippet = stripTags(match[3] || match[4] || "");
|
||||
if (!url || !title) continue;
|
||||
results.push({ title, url, snippet });
|
||||
}
|
||||
if (results.length > 0) return results;
|
||||
|
||||
const liteRegex = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>\s*<br\s*\/?>\s*<span[^>]*class="link-text"[^>]*>([\s\S]*?)<\/span>\s*<br\s*\/?>\s*<span[^>]*class="result-snippet"[^>]*>([\s\S]*?)<\/span>/g;
|
||||
for (const match of html.matchAll(liteRegex)) {
|
||||
const url = decodeUrl(match[1]);
|
||||
const title = stripTags(match[2]);
|
||||
const snippet = stripTags(match[4] || "");
|
||||
if (!url || !title) continue;
|
||||
results.push({ title, url, snippet });
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
function stripTags(text: string): string {
|
||||
return text.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function decodeUrl(url: string): string {
|
||||
try {
|
||||
const decoded = new URL(url, "https://duckduckgo.com");
|
||||
const uddg = decoded.searchParams.get("uddg");
|
||||
if (uddg) return decodeURIComponent(uddg);
|
||||
return decoded.toString();
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
function buildQuery(query: string, options: SearchOptions): string {
|
||||
let q = query;
|
||||
if (options.recencyFilter) {
|
||||
const recency: Record<string, string> = {
|
||||
day: "d",
|
||||
week: "w",
|
||||
month: "m",
|
||||
year: "y",
|
||||
};
|
||||
q += ` time:${recency[options.recencyFilter]}`;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
export async function searchWithDuckDuckGo(
|
||||
query: string,
|
||||
options: SearchOptions = {},
|
||||
): Promise<SearchResponse> {
|
||||
const activityId = activityMonitor.logStart({ type: "api", query });
|
||||
const q = buildQuery(query, options);
|
||||
const params = new URLSearchParams({ q });
|
||||
const url = `${DDG_HTML}?${params.toString()}`;
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
headers: {
|
||||
"user-agent": "Mozilla/5.0",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
},
|
||||
signal: options.signal,
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
activityMonitor.logError(activityId, message);
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
activityMonitor.logComplete(activityId, res.status);
|
||||
throw new Error(`DuckDuckGo error ${res.status}`);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
activityMonitor.logComplete(activityId, res.status);
|
||||
|
||||
const results = extractResultsFromHtml(html);
|
||||
let filtered = applyDomainFilter(results, options.domainFilter);
|
||||
const limit = Math.min(options.numResults ?? 5, 20);
|
||||
filtered = filtered.slice(0, limit);
|
||||
|
||||
if (filtered.length === 0) {
|
||||
// fallback to lite
|
||||
return searchWithDuckDuckGoLite(query, options);
|
||||
}
|
||||
|
||||
const answer = filtered
|
||||
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
|
||||
.join("\n\n");
|
||||
|
||||
return { answer, results: filtered };
|
||||
}
|
||||
|
||||
async function searchWithDuckDuckGoLite(
|
||||
query: string,
|
||||
options: SearchOptions,
|
||||
): Promise<SearchResponse> {
|
||||
const activityId = activityMonitor.logStart({ type: "api", query: `${query} (lite)` });
|
||||
const q = buildQuery(query, options);
|
||||
const params = new URLSearchParams({ q });
|
||||
const url = `${DDG_LITE}?${params.toString()}`;
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
headers: {
|
||||
"user-agent": "Mozilla/5.0",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
},
|
||||
signal: options.signal,
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
activityMonitor.logError(activityId, message);
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
activityMonitor.logComplete(activityId, res.status);
|
||||
throw new Error(`DuckDuckGo lite error ${res.status}`);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
activityMonitor.logComplete(activityId, res.status);
|
||||
|
||||
const results = extractResultsFromHtml(html);
|
||||
let filtered = applyDomainFilter(results, options.domainFilter);
|
||||
const limit = Math.min(options.numResults ?? 5, 20);
|
||||
filtered = filtered.slice(0, limit);
|
||||
|
||||
const answer = filtered
|
||||
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
|
||||
.join("\n\n");
|
||||
|
||||
return { answer, results: filtered };
|
||||
}
|
||||
Reference in New Issue
Block a user