233 lines
7.1 KiB
TypeScript
233 lines
7.1 KiB
TypeScript
import { existsSync, readFileSync } from "node:fs";
|
|
import { homedir } from "node:os";
|
|
import { join } from "node:path";
|
|
import { activityMonitor } from "./activity.js";
|
|
import { getApiKey, API_BASE, DEFAULT_MODEL } from "./gemini-api.js";
|
|
import { isGeminiWebAvailable, queryWithCookies } from "./gemini-web.js";
|
|
import { isPerplexityAvailable, searchWithPerplexity, type SearchResult, type SearchResponse, type SearchOptions } from "./perplexity.js";
|
|
import { searchWithDuckDuckGo } from "./ddg-search.js";
|
|
|
|
export type SearchProvider = "auto" | "perplexity" | "gemini" | "duckduckgo";
|
|
|
|
const CONFIG_PATH = join(homedir(), ".pi", "web-search.json");
|
|
|
|
let cachedSearchConfig: { searchProvider: SearchProvider } | null = null;
|
|
|
|
function getSearchConfig(): { searchProvider: SearchProvider } {
|
|
if (cachedSearchConfig) return cachedSearchConfig;
|
|
try {
|
|
if (existsSync(CONFIG_PATH)) {
|
|
const raw = JSON.parse(readFileSync(CONFIG_PATH, "utf-8"));
|
|
cachedSearchConfig = { searchProvider: raw.searchProvider ?? "auto" };
|
|
return cachedSearchConfig;
|
|
}
|
|
} catch {}
|
|
cachedSearchConfig = { searchProvider: "auto" };
|
|
return cachedSearchConfig;
|
|
}
|
|
|
|
export interface FullSearchOptions extends SearchOptions {
|
|
provider?: SearchProvider;
|
|
}
|
|
|
|
export async function search(query: string, options: FullSearchOptions = {}): Promise<SearchResponse> {
|
|
const config = getSearchConfig();
|
|
const provider = options.provider ?? config.searchProvider;
|
|
|
|
if (provider === "perplexity") {
|
|
return searchWithPerplexity(query, options);
|
|
}
|
|
|
|
if (provider === "duckduckgo") {
|
|
return searchWithDuckDuckGo(query, options);
|
|
}
|
|
|
|
if (provider === "gemini") {
|
|
const result = await searchWithGeminiApi(query, options)
|
|
?? await searchWithGeminiWeb(query, options);
|
|
if (result) return result;
|
|
return searchWithDuckDuckGo(query, options);
|
|
}
|
|
|
|
if (isPerplexityAvailable()) {
|
|
return searchWithPerplexity(query, options);
|
|
}
|
|
|
|
const geminiResult = await searchWithGeminiApi(query, options)
|
|
?? await searchWithGeminiWeb(query, options);
|
|
if (geminiResult) return geminiResult;
|
|
|
|
return searchWithDuckDuckGo(query, options);
|
|
}
|
|
|
|
async function searchWithGeminiApi(query: string, options: SearchOptions = {}): Promise<SearchResponse | null> {
|
|
const apiKey = getApiKey();
|
|
if (!apiKey) return null;
|
|
|
|
const activityId = activityMonitor.logStart({ type: "api", query });
|
|
|
|
try {
|
|
const model = DEFAULT_MODEL;
|
|
const body = {
|
|
contents: [{ parts: [{ text: query }] }],
|
|
tools: [{ google_search: {} }],
|
|
};
|
|
|
|
const res = await fetch(`${API_BASE}/models/${model}:generateContent?key=${apiKey}`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify(body),
|
|
signal: AbortSignal.any([
|
|
AbortSignal.timeout(60000),
|
|
...(options.signal ? [options.signal] : []),
|
|
]),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errorText = await res.text();
|
|
throw new Error(`Gemini API error ${res.status}: ${errorText.slice(0, 300)}`);
|
|
}
|
|
|
|
const data = await res.json() as GeminiSearchResponse;
|
|
activityMonitor.logComplete(activityId, res.status);
|
|
|
|
const answer = data.candidates?.[0]?.content?.parts
|
|
?.map(p => p.text).filter(Boolean).join("\n") ?? "";
|
|
|
|
const metadata = data.candidates?.[0]?.groundingMetadata;
|
|
const results = await resolveGroundingChunks(metadata?.groundingChunks, options.signal);
|
|
|
|
if (!answer && results.length === 0) return null;
|
|
return { answer, results };
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
if (message.toLowerCase().includes("abort")) {
|
|
activityMonitor.logComplete(activityId, 0);
|
|
} else {
|
|
activityMonitor.logError(activityId, message);
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function searchWithGeminiWeb(query: string, options: SearchOptions = {}): Promise<SearchResponse | null> {
|
|
const cookies = await isGeminiWebAvailable();
|
|
if (!cookies) return null;
|
|
|
|
const prompt = buildSearchPrompt(query, options);
|
|
const activityId = activityMonitor.logStart({ type: "api", query });
|
|
|
|
try {
|
|
const text = await queryWithCookies(prompt, cookies, {
|
|
model: "gemini-3-flash-preview",
|
|
signal: options.signal,
|
|
timeoutMs: 60000,
|
|
});
|
|
|
|
activityMonitor.logComplete(activityId, 200);
|
|
|
|
const results = extractSourceUrls(text);
|
|
return { answer: text, results };
|
|
} catch (err) {
|
|
const message = err instanceof Error ? err.message : String(err);
|
|
if (message.toLowerCase().includes("abort")) {
|
|
activityMonitor.logComplete(activityId, 0);
|
|
} else {
|
|
activityMonitor.logError(activityId, message);
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function buildSearchPrompt(query: string, options: SearchOptions): string {
|
|
let prompt = `Search the web and answer the following question. Include source URLs for your claims.\nFormat your response as:\n1. A direct answer to the question\n2. Cited sources as markdown links\n\nQuestion: ${query}`;
|
|
|
|
if (options.recencyFilter) {
|
|
const labels: Record<string, string> = {
|
|
day: "past 24 hours",
|
|
week: "past week",
|
|
month: "past month",
|
|
year: "past year",
|
|
};
|
|
prompt += `\n\nOnly include results from the ${labels[options.recencyFilter]}.`;
|
|
}
|
|
|
|
if (options.domainFilter?.length) {
|
|
const includes = options.domainFilter.filter(d => !d.startsWith("-"));
|
|
const excludes = options.domainFilter.filter(d => d.startsWith("-")).map(d => d.slice(1));
|
|
if (includes.length) prompt += `\n\nOnly cite sources from: ${includes.join(", ")}`;
|
|
if (excludes.length) prompt += `\n\nDo not cite sources from: ${excludes.join(", ")}`;
|
|
}
|
|
|
|
return prompt;
|
|
}
|
|
|
|
function extractSourceUrls(markdown: string): SearchResult[] {
|
|
const results: SearchResult[] = [];
|
|
const seen = new Set<string>();
|
|
const linkRegex = /\[([^\]]+)\]\((https?:\/\/[^)]+)\)/g;
|
|
for (const match of markdown.matchAll(linkRegex)) {
|
|
const url = match[2];
|
|
if (seen.has(url)) continue;
|
|
seen.add(url);
|
|
results.push({ title: match[1], url, snippet: "" });
|
|
}
|
|
return results;
|
|
}
|
|
|
|
async function resolveGroundingChunks(
|
|
chunks: GroundingChunk[] | undefined,
|
|
signal?: AbortSignal,
|
|
): Promise<SearchResult[]> {
|
|
if (!chunks?.length) return [];
|
|
|
|
const results: SearchResult[] = [];
|
|
for (const chunk of chunks) {
|
|
if (!chunk.web) continue;
|
|
const title = chunk.web.title || "";
|
|
let url = chunk.web.uri || "";
|
|
|
|
if (url.includes("vertexaisearch.cloud.google.com/grounding-api-redirect")) {
|
|
const resolved = await resolveRedirect(url, signal);
|
|
if (resolved) url = resolved;
|
|
}
|
|
|
|
if (url) results.push({ title, url, snippet: "" });
|
|
}
|
|
return results;
|
|
}
|
|
|
|
async function resolveRedirect(proxyUrl: string, signal?: AbortSignal): Promise<string | null> {
|
|
try {
|
|
const res = await fetch(proxyUrl, {
|
|
method: "HEAD",
|
|
redirect: "manual",
|
|
signal: AbortSignal.any([
|
|
AbortSignal.timeout(5000),
|
|
...(signal ? [signal] : []),
|
|
]),
|
|
});
|
|
return res.headers.get("location") || null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
interface GeminiSearchResponse {
|
|
candidates?: Array<{
|
|
content?: { parts?: Array<{ text?: string }> };
|
|
groundingMetadata?: {
|
|
webSearchQueries?: string[];
|
|
groundingChunks?: GroundingChunk[];
|
|
groundingSupports?: Array<{
|
|
segment?: { startIndex?: number; endIndex?: number; text?: string };
|
|
groundingChunkIndices?: number[];
|
|
}>;
|
|
};
|
|
}>;
|
|
}
|
|
|
|
interface GroundingChunk {
|
|
web?: { uri?: string; title?: string };
|
|
}
|