Re-add pi-web-access

This commit is contained in:
2026-02-19 22:23:48 +00:00
parent c242a0ca53
commit 774492f279
31 changed files with 8666 additions and 1 deletions

View File

@@ -0,0 +1,187 @@
import { activityMonitor } from "./activity.js";
export interface SearchResult {
title: string;
url: string;
snippet: string;
}
export interface SearchResponse {
answer: string;
results: SearchResult[];
}
export interface SearchOptions {
numResults?: number;
recencyFilter?: "day" | "week" | "month" | "year";
domainFilter?: string[];
signal?: AbortSignal;
}
const DDG_HTML = "https://duckduckgo.com/html/";
const DDG_LITE = "https://lite.duckduckgo.com/lite/";
function applyDomainFilter(urls: SearchResult[], domains?: string[]): SearchResult[] {
if (!domains || domains.length === 0) return urls;
const includes = domains.filter((d) => !d.startsWith("-")).map((d) => d.toLowerCase());
const excludes = domains.filter((d) => d.startsWith("-")).map((d) => d.slice(1).toLowerCase());
return urls.filter((r) => {
try {
const host = new URL(r.url).hostname.toLowerCase();
if (includes.length && !includes.some((d) => host === d || host.endsWith(`.${d}`))) {
return false;
}
if (excludes.some((d) => host === d || host.endsWith(`.${d}`))) {
return false;
}
return true;
} catch {
return false;
}
});
}
function extractResultsFromHtml(html: string): SearchResult[] {
const results: SearchResult[] = [];
const resultRegex = /<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:<a[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/a>|<div[^>]+class="result__snippet"[^>]*>([\s\S]*?)<\/div>)/g;
for (const match of html.matchAll(resultRegex)) {
const url = decodeUrl(match[1]);
const title = stripTags(match[2]);
const snippet = stripTags(match[3] || match[4] || "");
if (!url || !title) continue;
results.push({ title, url, snippet });
}
if (results.length > 0) return results;
const liteRegex = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>\s*<br\s*\/?>\s*<span[^>]*class="link-text"[^>]*>([\s\S]*?)<\/span>\s*<br\s*\/?>\s*<span[^>]*class="result-snippet"[^>]*>([\s\S]*?)<\/span>/g;
for (const match of html.matchAll(liteRegex)) {
const url = decodeUrl(match[1]);
const title = stripTags(match[2]);
const snippet = stripTags(match[4] || "");
if (!url || !title) continue;
results.push({ title, url, snippet });
}
return results;
}
function stripTags(text: string): string {
return text.replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim();
}
function decodeUrl(url: string): string {
try {
const decoded = new URL(url, "https://duckduckgo.com");
const uddg = decoded.searchParams.get("uddg");
if (uddg) return decodeURIComponent(uddg);
return decoded.toString();
} catch {
return url;
}
}
function buildQuery(query: string, options: SearchOptions): string {
let q = query;
if (options.recencyFilter) {
const recency: Record<string, string> = {
day: "d",
week: "w",
month: "m",
year: "y",
};
q += ` time:${recency[options.recencyFilter]}`;
}
return q;
}
export async function searchWithDuckDuckGo(
query: string,
options: SearchOptions = {},
): Promise<SearchResponse> {
const activityId = activityMonitor.logStart({ type: "api", query });
const q = buildQuery(query, options);
const params = new URLSearchParams({ q });
const url = `${DDG_HTML}?${params.toString()}`;
let res: Response;
try {
res = await fetch(url, {
headers: {
"user-agent": "Mozilla/5.0",
"accept-language": "en-US,en;q=0.9",
},
signal: options.signal,
});
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
activityMonitor.logError(activityId, message);
throw err;
}
if (!res.ok) {
activityMonitor.logComplete(activityId, res.status);
throw new Error(`DuckDuckGo error ${res.status}`);
}
const html = await res.text();
activityMonitor.logComplete(activityId, res.status);
const results = extractResultsFromHtml(html);
let filtered = applyDomainFilter(results, options.domainFilter);
const limit = Math.min(options.numResults ?? 5, 20);
filtered = filtered.slice(0, limit);
if (filtered.length === 0) {
// fallback to lite
return searchWithDuckDuckGoLite(query, options);
}
const answer = filtered
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
.join("\n\n");
return { answer, results: filtered };
}
async function searchWithDuckDuckGoLite(
query: string,
options: SearchOptions,
): Promise<SearchResponse> {
const activityId = activityMonitor.logStart({ type: "api", query: `${query} (lite)` });
const q = buildQuery(query, options);
const params = new URLSearchParams({ q });
const url = `${DDG_LITE}?${params.toString()}`;
let res: Response;
try {
res = await fetch(url, {
headers: {
"user-agent": "Mozilla/5.0",
"accept-language": "en-US,en;q=0.9",
},
signal: options.signal,
});
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
activityMonitor.logError(activityId, message);
throw err;
}
if (!res.ok) {
activityMonitor.logComplete(activityId, res.status);
throw new Error(`DuckDuckGo lite error ${res.status}`);
}
const html = await res.text();
activityMonitor.logComplete(activityId, res.status);
const results = extractResultsFromHtml(html);
let filtered = applyDomainFilter(results, options.domainFilter);
const limit = Math.min(options.numResults ?? 5, 20);
filtered = filtered.slice(0, limit);
const answer = filtered
.map((r, idx) => `${idx + 1}. ${r.title}\n ${r.url}`)
.join("\n\n");
return { answer, results: filtered };
}