Skip to content
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ During normal plan review, an Archive sidebar tab provides the same browsing via
| `/api/external-annotations` | POST | Add external annotations (single or batch `{ annotations: [...] }`) |
| `/api/external-annotations` | PATCH | Update fields on a single annotation (`?id=`) |
| `/api/external-annotations` | DELETE | Remove by `?id=`, `?source=`, or clear all |
| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour) |
| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour, cursor) |
| `/api/agents/jobs/stream` | GET | SSE stream for real-time agent job status updates |
| `/api/agents/jobs` | GET | Snapshot of agent jobs (polling fallback, `?since=N` for version gating) |
| `/api/agents/jobs` | POST | Launch an agent job (body: `{ provider, command, label }`) |
Expand Down
117 changes: 94 additions & 23 deletions apps/pi-extension/server/agent-jobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
AGENT_HEARTBEAT_INTERVAL_MS,
} from "../generated/agent-jobs.js";
import { formatClaudeLogEvent } from "../generated/claude-review.js";
import { formatCursorLogEvent, parseCursorModelsOutput, type CursorModel } from "../generated/cursor-review.js";
import { json, parseBody } from "./helpers.js";

// ---------------------------------------------------------------------------
Expand All @@ -32,6 +33,15 @@ const JOBS = `${BASE}/jobs`;
const JOBS_STREAM = `${JOBS}/stream`;
const CAPABILITIES = `${BASE}/capabilities`;

// Providers whose command is owned by the server. Client-supplied argv is never
// spawned for these — buildCommand must produce the command or the launch fails.
const SERVER_BUILT_PROVIDERS: ReadonlySet<string> = new Set([
"claude",
"codex",
"tour",
"cursor",
]);

// ---------------------------------------------------------------------------
// which() helper for Node.js
// ---------------------------------------------------------------------------
Expand All @@ -54,7 +64,7 @@ export interface AgentJobHandlerOptions {
mode: "plan" | "review" | "annotate";
getServerUrl: () => string;
getCwd: () => string;
/** Server-side command builder for known providers (codex, claude, tour). */
/** Server-side command builder for known providers (codex, claude, tour, cursor). */
buildCommand?: (provider: string, config?: Record<string, unknown>) => Promise<{
command: string[];
outputPath?: string;
Expand Down Expand Up @@ -84,6 +94,24 @@ export interface AgentJobHandlerOptions {
onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
}

/**
* Best-effort Cursor model catalog from `agent models`, parsed once. Empty when
* discovery fails or the CLI is unauthenticated — the UI falls back to an
* `auto`-only picker. Account-specific, so never hardcoded.
*/
function discoverCursorModels(): CursorModel[] {
try {
const out = execFileSync("agent", ["models"], {
timeout: 5000,
stdio: ["ignore", "pipe", "ignore"],
encoding: "utf8",
});
return parseCursorModelsOutput(out);
} catch {
return [];
}
}

export function createAgentJobHandler(options: AgentJobHandlerOptions) {
const { mode, getServerUrl, getCwd } = options;

Expand All @@ -94,10 +122,19 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
let version = 0;

// --- Capability detection (run once) ---
// Cursor CLI's binary is literally named `agent` (NOT `cursor`). When present,
// discover its account-specific model catalog so the UI doesn't hardcode ids.
const cursorAvailable = mode === "review" && whichCmd("agent");
const capabilities: AgentCapability[] = [
{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
{
id: "cursor",
name: "Cursor CLI",
available: cursorAvailable,
...(cursorAvailable ? { models: discoverCursorModels() } : {}),
},
];
const capabilitiesResponse: AgentCapabilities = {
mode,
Expand Down Expand Up @@ -183,31 +220,45 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
if (spawnOptions?.cwd) jobOutputPaths.set(`${id}:cwd`, spawnOptions.cwd);
broadcast({ type: "job:started", job: { ...info } });

// --- Stdout capture (Claude JSONL streaming) ---
// --- Stdout capture (Claude/Cursor stream-json) ---
let stdoutBuf = "";
if (captureStdout && proc.stdout) {
// Format one complete JSONL line into a live-log delta (skip result
// events — handled in onJobComplete).
const emitLogLine = (line: string) => {
if (!line.trim()) return;
// Tour jobs with the Claude engine also stream Claude JSONL.
if (provider === "claude" || spawnOptions?.engine === "claude") {
const formatted = formatClaudeLogEvent(line);
if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
return;
}
// Cursor: map stream-json events (init/assistant/tool_call/result)
// into readable log deltas, applying the partial-output dedup rule.
if (provider === "cursor") {
const formatted = formatCursorLogEvent(line);
if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
return;
}
try {
const event = JSON.parse(line);
if (event.type === 'result') return;
} catch { /* not JSON — forward as raw log */ }
broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
};
// stream-json output is NDJSON and chunk boundaries are arbitrary —
// carry the trailing partial line until a later chunk completes it,
// otherwise records split across chunks are dropped from live logs.
let logLineCarry = "";
proc.stdout.on("data", (chunk: Buffer) => {
const text = chunk.toString();
stdoutBuf += text;

// Forward JSONL lines as log events
const lines = text.split('\n');
for (const line of lines) {
if (!line.trim()) continue;
// Tour jobs with the Claude engine also stream Claude JSONL.
if (provider === "claude" || spawnOptions?.engine === "claude") {
const formatted = formatClaudeLogEvent(line);
if (formatted !== null) {
broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
}
continue;
}
try {
const event = JSON.parse(line);
if (event.type === 'result') continue;
} catch { /* not JSON — forward as raw log */ }
broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
}
const lines = (logLineCarry + text).split('\n');
logLineCarry = lines.pop() ?? "";
for (const line of lines) emitLogLine(line);
});
proc.stdout.on("end", () => {
if (logLineCarry) emitLogLine(logLineCarry);
});
}

Expand Down Expand Up @@ -265,8 +316,15 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
stdout: captureStdout ? stdoutBuf : undefined,
cwd: jobCwd,
});
} catch {
// Result ingestion failure shouldn't prevent job completion broadcast
} catch (err) {
// Claude/Codex are fail-open; Cursor is fail-closed — an unexpected
// throw during prompt-enforced ingestion must fail the job, not pass
// it. (The Cursor handler normally fails by mutation and never throws;
// this guards future refactors.)
if (provider === "cursor") {
entry.info.status = "failed";
entry.info.error = err instanceof Error ? err.message : "Cursor result ingestion failed";
}
}
}
jobOutputPaths.delete(id);
Expand Down Expand Up @@ -418,6 +476,19 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
return true;
}

// Fail-closed enforcement for server-owned providers: the command MUST
// be built server-side. Client-supplied argv is never spawned for these
// providers — a null/throwing builder becomes an error, not a fallback.
if (SERVER_BUILT_PROVIDERS.has(provider)) {
if (!options.buildCommand) {
json(res, { error: `Provider ${provider} requires server-built command` }, 400);
return true;
}
// Discard any client-supplied argv so a null build cleanly hits the
// `command.length === 0` guard below instead of falling through.
command = [];
}

// Try server-side command building for known providers
let captureStdout = false;
let stdinPrompt: string | undefined;
Expand Down
60 changes: 60 additions & 0 deletions apps/pi-extension/server/serverReview.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ import {
transformClaudeFindings,
} from "../generated/claude-review.js";
import { createTourSession, TOUR_EMPTY_OUTPUT_ERROR } from "../generated/tour-review.js";
import {
CURSOR_REVIEW_PROMPT,
buildCursorCommand,
parseCursorStreamOutput,
transformCursorFindings,
} from "../generated/cursor-review.js";
import {
WorkspaceReviewSession,
type WorkspaceDiffType,
Expand Down Expand Up @@ -567,6 +573,17 @@ export async function startReviewServer(options: {
return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, effort, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
}

if (provider === "cursor") {
// Cursor has no schema flag — its marker-block output contract lives in
// CURSOR_REVIEW_PROMPT. captureStdout is required (the marker block comes
// back on stdout, like Claude). buildCursorCommand passes the prompt as
// the trailing argv arg and threads --workspace=cwd to match the spawn cwd.
const model = typeof config?.model === "string" && config.model ? config.model : undefined;
const prompt = CURSOR_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
const { command } = buildCursorCommand(prompt, model, cwd);
return { command, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
}

return null;
},

Expand Down Expand Up @@ -636,6 +653,49 @@ export async function startReviewServer(options: {
return;
}

// --- Cursor path ---
// FAIL-CLOSED: Cursor output is prompt-enforced (no schema flag), so any
// missing/malformed/schema/transform/insertion failure must MUTATE the job
// to failed — NEVER throw (agent-jobs.ts swallows throws, silently leaving
// an exit-0 job marked done). Mirrors the Tour fail-closed pattern below.
if (job.provider === "cursor") {
const output = meta.stdout ? parseCursorStreamOutput(meta.stdout) : null;
if (!output) {
job.status = "failed";
job.error = "Cursor review output missing or unparseable (no valid marker JSON).";
return;
}

// Derive the verdict from finding severities (like Claude) rather than
// trusting Cursor's free-form `correctness` string. Cursor has no schema
// flag, so a model value like "not correct" would be stored verbatim and
// the detail panel (any string containing "correct" except "incorrect" →
// green) would invert the displayed result.
const hasImportant = output.findings.some((f) => f.severity === "important");
job.summary = {
correctness: hasImportant ? "Issues Found" : "Correct",
explanation: output.summary.explanation,
confidence: output.summary.confidence,
};

if (output.findings.length > 0) {
const annotations = transformCursorFindings(
output.findings,
job.source,
cwd,
workspace ? (filePath) => workspace.normalizeAnnotationPath(filePath) : undefined,
)
.map(a => ({ ...a, ...jobPrContext, ...(jobDiffScope && { diffScope: jobDiffScope }) }));
const result = externalAnnotations.addAnnotations({ annotations });
if ("error" in result) {
job.status = "failed";
job.error = `Cursor annotation insertion failed: ${result.error}`;
return;
}
}
return;
}

if (job.provider === "tour") {
const { summary } = await tour.onJobComplete({ job, meta });
if (summary) {
Expand Down
2 changes: 1 addition & 1 deletion apps/pi-extension/vendor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ for f in feedback-templates prompts review-core diff-paths cli-pagination jj-cor
done

# Vendor review agent modules from packages/server/ — rewrite imports for generated/ layout
for f in agent-review-message codex-review claude-review path-utils; do
for f in agent-review-message codex-review claude-review cursor-review path-utils; do
src="../../packages/server/$f.ts"
printf '// @generated — DO NOT EDIT. Source: packages/server/%s.ts\n' "$f" | cat - "$src" \
| sed 's|from "./vcs"|from "./review-core.js"|' \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ function ProviderPill({ provider, engine, model }: { provider: string; engine?:
const engineLabel = engine === 'codex' ? 'Codex' : 'Claude';
label = model && engine !== 'codex' ? `Tour · ${engineLabel} ${model.charAt(0).toUpperCase() + model.slice(1)}` : `Tour · ${engineLabel}`;
} else {
label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : 'Shell';
label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : provider === 'cursor' ? 'Cursor' : 'Shell';
}
return (
<span className={`text-[10px] font-semibold uppercase tracking-wider px-1.5 py-0.5 rounded ${
Expand Down
Loading