extract.ts

 1// SPDX-FileCopyrightText: Amolith <amolith@secluded.site>
 2// SPDX-FileCopyrightText: Armin Ronacher <armin.ronacher@active-4.com>
 3//
 4// SPDX-License-Identifier: Apache-2.0
 5
 6import type { complete } from "@mariozechner/pi-ai";
 7import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
 8import type { ExtractedQuestion, ExtractionResult } from "./prompt.js";
 9
10// Preferred model for extraction — lightweight is fine for structured JSON output.
11// Falls back to the session model if this one isn't available.
12const PREFERRED_EXTRACTION_MODEL_ID = "nemotron-3-super-120b-a12b";
13
14/**
15 * Resolve the extraction model: prefer a lightweight model from the registry,
16 * fall back to the current session model.
17 */
18export function resolveExtractionModel(ctx: ExtensionContext, fallback: NonNullable<ExtensionContext["model"]>) {
19	const available = ctx.modelRegistry.getAvailable();
20	const preferred = available.find((m) => m.id === PREFERRED_EXTRACTION_MODEL_ID);
21	return preferred ?? fallback;
22}
23
24/**
25 * Parse a tool call response into an ExtractionResult.
26 */
27export function parseToolCallResult(response: Awaited<ReturnType<typeof complete>>): ExtractionResult | null {
28	const toolCall = response.content.find((c) => c.type === "toolCall" && c.name === "extract_questions");
29
30	if (!toolCall || toolCall.type !== "toolCall") {
31		console.error("Model did not call extract_questions:", response.content);
32		return null;
33	}
34
35	const args = toolCall.arguments as Record<string, unknown>;
36	if (!Array.isArray(args.questions)) {
37		console.error("[answer] expected questions array, got:", typeof args.questions, args);
38		return null;
39	}
40
41	// Validate each question item — model may return plain strings instead
42	// of objects if it ignores the schema, so we handle both gracefully.
43	const questions: ExtractedQuestion[] = [];
44	for (const item of args.questions) {
45		if (typeof item === "string") {
46			// Model returned bare string instead of object — use as question text
47			questions.push({ question: item });
48			continue;
49		}
50		if (typeof item !== "object" || item === null) continue;
51		const obj = item as Record<string, unknown>;
52		if (typeof obj.question !== "string") {
53			console.error("[answer] skipping item with no 'question' string:", Object.keys(obj));
54			continue;
55		}
56		questions.push({
57			question: obj.question,
58			context: typeof obj.context === "string" ? obj.context : undefined,
59		});
60	}
61
62	return { questions };
63}
64
65/** Escape characters that would break pseudo-XML tag boundaries. */
66export function escapeXml(s: string): string {
67	return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
68}