From 9e82d417a699dd4e30912318415af105dd0c324a Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 13:51:52 -0700
Subject: [PATCH 1/7] feat(review): add Cursor CLI as an experimental review
 agent provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 'cursor' as a third background review provider alongside Claude, Codex,
and Code Tour, plugged into the existing /api/agents/jobs system (not Ask AI).

Design principle: match the existing providers, don't out-engineer them. Cursor's
only real difference is prose output (no schema flag), which earns strict PARSING
only — no extra isolation, grounding, or finding-policing.

Backend:
- packages/server/cursor-review.ts: command builder (read-only flags, prompt on
  stdin), line-buffered NDJSON stream reducer, static-marker last-block JSON
  extraction, schema validation, finding->annotation transform.
- agent-jobs.ts: capability detection (binary is 'agent', not 'cursor'),
  fail-closed server-built-provider guard (client argv is never spawned for
  built-in providers), cursor stdout-log dispatch.
- review.ts onJobComplete: fail-closed BY MUTATION (never throws — the runner
  swallows throws), PR/diffScope attribution, workspace path normalization.
- Full Pi parity (agent-jobs.ts, serverReview.ts, vendored via vendor.sh).

UI (capability-gated, invisible unless the Cursor CLI is installed):
- AgentsTab/useAgentSettings: Cursor as a review-only engine with an experimental
  label and Auto model default.

Read-only posture is flags-only: --mode ask + --sandbox enabled, no --force.
Env is inherited like Claude/Codex.

Tests: packages/server/cursor-review.test.ts (22, high-value only). Full
typecheck + affected review-agent test families pass.

OUTSTANDING (not automatable here): empirically verify 'agent -p --mode ask' is
non-mutating with an authenticated Cursor account before trusting in production.
Ship as experimental until confirmed.
---
 AGENTS.md                                |   2 +-
 apps/pi-extension/server/agent-jobs.ts   |  34 ++
 apps/pi-extension/server/serverReview.ts |  54 +++
 apps/pi-extension/vendor.sh              |   2 +-
 packages/server/agent-jobs.ts            |  37 ++
 packages/server/cursor-review.test.ts    | 308 ++++++++++++++
 packages/server/cursor-review.ts         | 515 +++++++++++++++++++++++
 packages/server/review.ts                |  54 +++
 packages/ui/components/AgentsTab.tsx     |  91 +++-
 packages/ui/hooks/useAgentSettings.ts    |  32 +-
 10 files changed, 1109 insertions(+), 20 deletions(-)
 create mode 100644 packages/server/cursor-review.test.ts
 create mode 100644 packages/server/cursor-review.ts

diff --git a/AGENTS.md b/AGENTS.md
index 3bde42b41..ba82fc5fc 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -311,7 +311,7 @@ During normal plan review, an Archive sidebar tab provides the same browsing via
 | `/api/external-annotations` | POST | Add external annotations (single or batch `{ annotations: [...] }`) |
 | `/api/external-annotations` | PATCH | Update fields on a single annotation (`?id=`) |
 | `/api/external-annotations` | DELETE | Remove by `?id=`, `?source=`, or clear all |
-| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour) |
+| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour, cursor) |
 | `/api/agents/jobs/stream` | GET | SSE stream for real-time agent job status updates |
 | `/api/agents/jobs` | GET | Snapshot of agent jobs (polling fallback, `?since=N` for version gating) |
 | `/api/agents/jobs` | POST | Launch an agent job (body: `{ provider, command, label }`) |
diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
index d4a492402..696c56212 100644
--- a/apps/pi-extension/server/agent-jobs.ts
+++ b/apps/pi-extension/server/agent-jobs.ts
@@ -21,6 +21,7 @@ import {
 	AGENT_HEARTBEAT_INTERVAL_MS,
 } from "../generated/agent-jobs.js";
 import { formatClaudeLogEvent } from "../generated/claude-review.js";
+import { formatCursorLogEvent } from "../generated/cursor-review.js";
 import { json, parseBody } from "./helpers.js";
 
 // ---------------------------------------------------------------------------
@@ -32,6 +33,15 @@ const JOBS = `${BASE}/jobs`;
 const JOBS_STREAM = `${JOBS}/stream`;
 const CAPABILITIES = `${BASE}/capabilities`;
 
+// Providers whose command is owned by the server. Client-supplied argv is never
+// spawned for these — buildCommand must produce the command or the launch fails.
+const SERVER_BUILT_PROVIDERS: ReadonlySet<string> = new Set([
+	"claude",
+	"codex",
+	"tour",
+	"cursor",
+]);
+
 // ---------------------------------------------------------------------------
 // which() helper for Node.js
 // ---------------------------------------------------------------------------
@@ -98,6 +108,8 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 		{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
 		{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
 		{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
+		// Cursor CLI's binary is literally named `agent` (NOT `cursor`).
+		{ id: "cursor", name: "Cursor CLI", available: whichCmd("agent") },
 	];
 	const capabilitiesResponse: AgentCapabilities = {
 		mode,
@@ -202,6 +214,15 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 							}
 							continue;
 						}
+						// Cursor: map stream-json events (init/assistant/tool_call/result)
+						// into readable log deltas, applying the partial-output dedup rule.
+						if (provider === "cursor") {
+							const formatted = formatCursorLogEvent(line);
+							if (formatted !== null) {
+								broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+							}
+							continue;
+						}
 						try {
 							const event = JSON.parse(line);
 							if (event.type === 'result') continue;
@@ -418,6 +439,19 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 						return true;
 					}
 
+					// Fail-closed enforcement for server-owned providers: the command MUST
+					// be built server-side. Client-supplied argv is never spawned for these
+					// providers — a null/throwing builder becomes an error, not a fallback.
+					if (SERVER_BUILT_PROVIDERS.has(provider)) {
+						if (!options.buildCommand) {
+							json(res, { error: `Provider ${provider} requires server-built command` }, 400);
+							return true;
+						}
+						// Discard any client-supplied argv so a null build cleanly hits the
+						// `command.length === 0` guard below instead of falling through.
+						command = [];
+					}
+
 					// Try server-side command building for known providers
 					let captureStdout = false;
 					let stdinPrompt: string | undefined;
diff --git a/apps/pi-extension/server/serverReview.ts b/apps/pi-extension/server/serverReview.ts
index 94598038d..20a8d7282 100644
--- a/apps/pi-extension/server/serverReview.ts
+++ b/apps/pi-extension/server/serverReview.ts
@@ -90,6 +90,12 @@ import {
 	transformClaudeFindings,
 } from "../generated/claude-review.js";
 import { createTourSession, TOUR_EMPTY_OUTPUT_ERROR } from "../generated/tour-review.js";
+import {
+	CURSOR_REVIEW_PROMPT,
+	buildCursorCommand,
+	parseCursorStreamOutput,
+	transformCursorFindings,
+} from "../generated/cursor-review.js";
 import {
 	WorkspaceReviewSession,
 	type WorkspaceDiffType,
@@ -567,6 +573,17 @@ export async function startReviewServer(options: {
 				return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, effort, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
 			}
 
+			if (provider === "cursor") {
+				// Cursor has no schema flag — its marker-block output contract lives in
+				// CURSOR_REVIEW_PROMPT. captureStdout is required (the marker block comes
+				// back on stdout, like Claude). buildCursorCommand puts the prompt on
+				// stdin and threads --workspace=cwd to match the spawn cwd.
+				const model = typeof config?.model === "string" && config.model ? config.model : undefined;
+				const prompt = CURSOR_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
+				const { command, stdinPrompt } = buildCursorCommand(prompt, model, cwd);
+				return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
+			}
+
 			return null;
 		},
 
@@ -636,6 +653,43 @@ export async function startReviewServer(options: {
 				return;
 			}
 
+			// --- Cursor path ---
+			// FAIL-CLOSED: Cursor output is prompt-enforced (no schema flag), so any
+			// missing/malformed/schema/transform/insertion failure must MUTATE the job
+			// to failed — NEVER throw (agent-jobs.ts swallows throws, silently leaving
+			// an exit-0 job marked done). Mirrors the Tour fail-closed pattern below.
+			if (job.provider === "cursor") {
+				const output = meta.stdout ? parseCursorStreamOutput(meta.stdout) : null;
+				if (!output) {
+					job.status = "failed";
+					job.error = "Cursor review output missing or unparseable (no valid marker JSON).";
+					return;
+				}
+
+				job.summary = {
+					correctness: output.summary.correctness,
+					explanation: output.summary.explanation,
+					confidence: output.summary.confidence,
+				};
+
+				if (output.findings.length > 0) {
+					const annotations = transformCursorFindings(
+						output.findings,
+						job.source,
+						cwd,
+						workspace ? (filePath) => workspace.normalizeAnnotationPath(filePath) : undefined,
+					)
+						.map(a => ({ ...a, ...jobPrContext, ...(jobDiffScope && { diffScope: jobDiffScope }) }));
+					const result = externalAnnotations.addAnnotations({ annotations });
+					if ("error" in result) {
+						job.status = "failed";
+						job.error = `Cursor annotation insertion failed: ${result.error}`;
+						return;
+					}
+				}
+				return;
+			}
+
 			if (job.provider === "tour") {
 				const { summary } = await tour.onJobComplete({ job, meta });
 				if (summary) {
diff --git a/apps/pi-extension/vendor.sh b/apps/pi-extension/vendor.sh
index 5c9a32544..e0ecadfdc 100755
--- a/apps/pi-extension/vendor.sh
+++ b/apps/pi-extension/vendor.sh
@@ -13,7 +13,7 @@ for f in feedback-templates prompts review-core diff-paths cli-pagination jj-cor
 done
 
 # Vendor review agent modules from packages/server/ — rewrite imports for generated/ layout
-for f in agent-review-message codex-review claude-review path-utils; do
+for f in agent-review-message codex-review claude-review cursor-review path-utils; do
   src="../../packages/server/$f.ts"
   printf '// @generated — DO NOT EDIT. Source: packages/server/%s.ts\n' "$f" | cat - "$src" \
     | sed 's|from "./vcs"|from "./review-core.js"|' \
diff --git a/packages/server/agent-jobs.ts b/packages/server/agent-jobs.ts
index 9a1424415..c6ee893bf 100644
--- a/packages/server/agent-jobs.ts
+++ b/packages/server/agent-jobs.ts
@@ -9,6 +9,7 @@
  */
 
 import { formatClaudeLogEvent } from "./claude-review";
+import { formatCursorLogEvent } from "./cursor-review";
 import {
   type AgentJobInfo,
   type AgentJobEvent,
@@ -46,6 +47,15 @@ const JOBS = `${BASE}/jobs`;
 const JOBS_STREAM = `${JOBS}/stream`;
 const CAPABILITIES = `${BASE}/capabilities`;
 
+// Providers whose command is owned by the server. Client-supplied argv is never
+// spawned for these — buildCommand must produce the command or the launch fails.
+const SERVER_BUILT_PROVIDERS: ReadonlySet<string> = new Set([
+  "claude",
+  "codex",
+  "tour",
+  "cursor",
+]);
+
 // ---------------------------------------------------------------------------
 // Factory
 // ---------------------------------------------------------------------------
@@ -110,6 +120,8 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
     { id: "claude", name: "Claude Code", available: !!Bun.which("claude") },
     { id: "codex", name: "Codex CLI", available: !!Bun.which("codex") },
     { id: "tour", name: "Code Tour", available: !!Bun.which("claude") || !!Bun.which("codex") },
+    // Cursor CLI's binary is literally named `agent` (NOT `cursor`).
+    { id: "cursor", name: "Cursor CLI", available: !!Bun.which("agent") },
   ];
   const capabilitiesResponse: AgentCapabilities = {
     mode,
@@ -254,6 +266,15 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
                     }
                     continue;
                   }
+                  // Cursor: map stream-json events (init/assistant/tool_call/result)
+                  // into readable log deltas, applying the partial-output dedup rule.
+                  if (provider === "cursor") {
+                    const formatted = formatCursorLogEvent(line);
+                    if (formatted !== null) {
+                      broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+                    }
+                    continue;
+                  }
                   try {
                     const event = JSON.parse(line);
                     if (event.type === 'result') continue; // handled in onJobComplete
@@ -444,6 +465,22 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
             );
           }
 
+          // Fail-closed enforcement for server-owned providers: the command MUST
+          // be built server-side. Client-supplied argv is never spawned for these
+          // providers — a null/throwing builder becomes an error, not a fallback.
+          const isServerBuiltProvider = SERVER_BUILT_PROVIDERS.has(provider);
+          if (isServerBuiltProvider) {
+            if (!options.buildCommand) {
+              return Response.json(
+                { error: `Provider ${provider} requires server-built command` },
+                { status: 400 },
+              );
+            }
+            // Discard any client-supplied argv so a null build cleanly hits the
+            // `command.length === 0` guard below instead of falling through.
+            command = [];
+          }
+
           // Try server-side command building for known providers
           let captureStdout = false;
           let stdinPrompt: string | undefined;
diff --git a/packages/server/cursor-review.test.ts b/packages/server/cursor-review.test.ts
new file mode 100644
index 000000000..bd724037d
--- /dev/null
+++ b/packages/server/cursor-review.test.ts
@@ -0,0 +1,308 @@
+import { describe, expect, test } from "bun:test";
+import {
+  CURSOR_MARKER_OPEN,
+  CURSOR_MARKER_CLOSE,
+  buildCursorCommand,
+  reduceCursorStreamEvents,
+  parseCursorStreamOutput,
+  formatCursorLogEvent,
+  transformCursorFindings,
+  validateCursorReviewOutput,
+  type CursorFinding,
+  type CursorReviewOutput,
+} from "./cursor-review";
+
+// ---------------------------------------------------------------------------
+// Helpers — build NDJSON the way Cursor `stream-json` does.
+// ---------------------------------------------------------------------------
+
+/** Wrap a JSON object as one NDJSON record (line + newline). */
+function ndjson(obj: unknown): string {
+  return JSON.stringify(obj) + "\n";
+}
+
+/** A real partial-output assistant delta: timestamp_ms present, model_call_id absent. */
+function assistantDelta(text: string, ts = 1): string {
+  return ndjson({ type: "assistant", timestamp_ms: ts, message: { content: text } });
+}
+
+/** A duplicate assistant flush: model_call_id present → must be skipped. */
+function assistantDuplicate(text: string, ts = 1): string {
+  return ndjson({ type: "assistant", timestamp_ms: ts, model_call_id: "call_1", message: { content: text } });
+}
+
+/** Wrap review JSON in the marker block. */
+function markerBlock(payload: unknown): string {
+  return `${CURSOR_MARKER_OPEN}\n${JSON.stringify(payload, null, 2)}\n${CURSOR_MARKER_CLOSE}`;
+}
+
+const validReview: CursorReviewOutput = {
+  findings: [
+    {
+      file: "packages/server/review.ts",
+      line: 123,
+      end_line: 130,
+      severity: "important",
+      description: "Null deref on the unhappy path.",
+      reasoning: "The guard above only covers the happy path.",
+    },
+  ],
+  summary: { correctness: "Issues Found", explanation: "One important issue.", confidence: 0.85 },
+};
+
+/** Split a string into N near-equal chunks to simulate arbitrary pipe boundaries. */
+function chunkify(s: string, parts: number): string[] {
+  const size = Math.ceil(s.length / parts);
+  const out: string[] = [];
+  for (let i = 0; i < s.length; i += size) out.push(s.slice(i, i + size));
+  return out;
+}
+
+// ---------------------------------------------------------------------------
+// Command construction
+// ---------------------------------------------------------------------------
+
+describe("buildCursorCommand", () => {
+  test("uses the `agent` binary and read-only flags, prompt on stdin", () => {
+    const { command, stdinPrompt } = buildCursorCommand("review this", "gpt-5", "/repo");
+    expect(command[0]).toBe("agent");
+    expect(command).toContain("-p");
+    // Read-only posture: --mode ask + --sandbox enabled, NO --force/--yolo.
+    expect(command).toContain("--mode");
+    expect(command[command.indexOf("--mode") + 1]).toBe("ask");
+    expect(command).toContain("--sandbox");
+    expect(command[command.indexOf("--sandbox") + 1]).toBe("enabled");
+    expect(command).not.toContain("--force");
+    expect(command).not.toContain("--yolo");
+    // stream-json + partial output for live logs.
+    expect(command).toContain("--output-format");
+    expect(command[command.indexOf("--output-format") + 1]).toBe("stream-json");
+    expect(command).toContain("--stream-partial-output");
+    // workspace matches launch cwd; model present.
+    expect(command[command.indexOf("--workspace") + 1]).toBe("/repo");
+    expect(command[command.indexOf("--model") + 1]).toBe("gpt-5");
+    // Prompt is NOT in argv — it goes on stdin.
+    expect(stdinPrompt).toBe("review this");
+    expect(command).not.toContain("review this");
+  });
+
+  test("omits --model when model is Auto or empty", () => {
+    expect(buildCursorCommand("p", "Auto", "/repo").command).not.toContain("--model");
+    expect(buildCursorCommand("p", "", "/repo").command).not.toContain("--model");
+    expect(buildCursorCommand("p", undefined, "/repo").command).not.toContain("--model");
+  });
+
+  test("omits --workspace when no cwd is provided", () => {
+    expect(buildCursorCommand("p").command).not.toContain("--workspace");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// NDJSON reduction — line buffering across arbitrary chunk boundaries
+// ---------------------------------------------------------------------------
+
+describe("reduceCursorStreamEvents", () => {
+  test("reconstructs canonical text regardless of how the buffer is chunked", () => {
+    const stdout =
+      ndjson({ type: "system", subtype: "init", model: "Auto" }) +
+      assistantDelta("Looking at ") +
+      assistantDelta("the diff.") +
+      ndjson({ type: "result", result: " Done." });
+
+    // Whole buffer (the accumulated stdout is what onJobComplete parses).
+    const whole = reduceCursorStreamEvents(stdout);
+    expect(whole.canonicalText).toBe("Looking at the diff. Done.");
+
+    // Re-joining arbitrary chunk splits must yield identical canonical text —
+    // proves pipe chunk boundaries never corrupt a record.
+    for (const parts of [2, 3, 7, stdout.length]) {
+      const rejoined = chunkify(stdout, parts).join("");
+      expect(reduceCursorStreamEvents(rejoined).canonicalText).toBe("Looking at the diff. Done.");
+    }
+  });
+
+  test("filters duplicate partial-output flushes (model_call_id present)", () => {
+    const stdout =
+      assistantDelta("real ") +
+      assistantDuplicate("DUPLICATE") + // skipped: model_call_id present
+      assistantDelta("text");
+    const { canonicalText } = reduceCursorStreamEvents(stdout);
+    expect(canonicalText).toBe("real text");
+    expect(canonicalText).not.toContain("DUPLICATE");
+  });
+
+  test("ignores malformed lines without aborting the reduction", () => {
+    const stdout = assistantDelta("a") + "this is not json\n" + assistantDelta("b");
+    const { canonicalText, recordCount } = reduceCursorStreamEvents(stdout);
+    expect(canonicalText).toBe("ab");
+    expect(recordCount).toBe(2); // malformed line not counted
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Marker extraction — last block wins
+// ---------------------------------------------------------------------------
+
+describe("parseCursorStreamOutput", () => {
+  test("extracts the LAST complete marker block from multi-message output", () => {
+    // An earlier (stale/draft) block followed by the final authoritative one.
+    const staleReview = { findings: [], summary: { correctness: "Correct", explanation: "draft", confidence: 0.1 } };
+    const stdout =
+      assistantDelta("Here is a draft:\n" + markerBlock(staleReview) + "\n") +
+      assistantDelta("On reflection:\n" + markerBlock(validReview));
+
+    const out = parseCursorStreamOutput(stdout);
+    expect(out).not.toBeNull();
+    expect(out!.summary.correctness).toBe("Issues Found");
+    expect(out!.findings).toHaveLength(1);
+    expect(out!.findings[0].file).toBe("packages/server/review.ts");
+  });
+
+  test("parses a single marker block spread across NDJSON records and chunks", () => {
+    const full = "Commentary first.\n" + markerBlock(validReview);
+    // Split the marker text itself across multiple assistant deltas.
+    const half = Math.floor(full.length / 2);
+    const stdout = assistantDelta(full.slice(0, half)) + assistantDelta(full.slice(half));
+    // Then chunk the raw stdout at arbitrary byte boundaries.
+    const rejoined = chunkify(stdout, 9).join("");
+    const out = parseCursorStreamOutput(rejoined);
+    expect(out).not.toBeNull();
+    expect(out!.findings).toHaveLength(1);
+  });
+
+  test("returns null when the marker block is missing", () => {
+    const stdout = assistantDelta("I found no issues. Everything looks correct.");
+    expect(parseCursorStreamOutput(stdout)).toBeNull();
+  });
+
+  test("returns null when the marker block is opened but never closed", () => {
+    const stdout = assistantDelta(`prose ${CURSOR_MARKER_OPEN}\n{"findings":[]`);
+    expect(parseCursorStreamOutput(stdout)).toBeNull();
+  });
+
+  test("returns null when the marker block contains malformed JSON", () => {
+    const stdout = assistantDelta(`${CURSOR_MARKER_OPEN}\n{ findings: [ }\n${CURSOR_MARKER_CLOSE}`);
+    expect(parseCursorStreamOutput(stdout)).toBeNull();
+  });
+
+  test("returns null when JSON is valid but schema-invalid", () => {
+    // Missing summary.confidence (and bad severity) — must fail validation.
+    const bad = {
+      findings: [{ file: "x.ts", line: 1, severity: "blocker", description: "d" }],
+      summary: { correctness: "Issues Found", explanation: "e" },
+    };
+    const stdout = assistantDelta(markerBlock(bad));
+    expect(parseCursorStreamOutput(stdout)).toBeNull();
+  });
+
+  test("returns null on empty stdout", () => {
+    expect(parseCursorStreamOutput("")).toBeNull();
+    expect(parseCursorStreamOutput("   \n  ")).toBeNull();
+  });
+
+  test("accepts an empty finding set with a valid summary", () => {
+    const clean = { findings: [], summary: { correctness: "Correct", explanation: "No issues.", confidence: 1 } };
+    const out = parseCursorStreamOutput(assistantDelta(markerBlock(clean)));
+    expect(out).not.toBeNull();
+    expect(out!.findings).toHaveLength(0);
+    expect(out!.summary.correctness).toBe("Correct");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Schema validator (direct)
+// ---------------------------------------------------------------------------
+
+describe("validateCursorReviewOutput", () => {
+  test("defaults end_line to line and reasoning to empty string", () => {
+    const out = validateCursorReviewOutput({
+      findings: [{ file: "a.ts", line: 5, severity: "nit", description: "d" }],
+      summary: { correctness: "Issues Found", explanation: "e", confidence: 0.5 },
+    });
+    expect(out).not.toBeNull();
+    expect(out!.findings[0].end_line).toBe(5);
+    expect(out!.findings[0].reasoning).toBe("");
+  });
+
+  test("rejects non-object, missing findings array, and bad severity", () => {
+    expect(validateCursorReviewOutput(null)).toBeNull();
+    expect(validateCursorReviewOutput({ summary: { correctness: "c", explanation: "e", confidence: 1 } })).toBeNull();
+    expect(
+      validateCursorReviewOutput({
+        findings: [{ file: "a.ts", line: 1, severity: "high", description: "d" }],
+        summary: { correctness: "c", explanation: "e", confidence: 1 },
+      }),
+    ).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Log formatting
+// ---------------------------------------------------------------------------
+
+describe("formatCursorLogEvent", () => {
+  test("maps init / assistant-delta / tool_call / result and skips noise", () => {
+    expect(formatCursorLogEvent(ndjson({ type: "system", subtype: "init", model: "Auto", session_id: "s1" }))).toContain("model=Auto");
+    expect(formatCursorLogEvent(assistantDelta("hello").trim())).toBe("hello");
+    expect(formatCursorLogEvent(ndjson({ type: "tool_call", subtype: "started", name: "read_file", args: { path: "x.ts" } }))).toContain("[read_file]");
+    expect(formatCursorLogEvent(ndjson({ type: "tool_call", subtype: "completed", name: "read_file" }))).toBe("[read_file] completed");
+    expect(formatCursorLogEvent(ndjson({ type: "result", duration_ms: 42, request_id: "r1" }))).toContain("42ms");
+  });
+
+  test("skips duplicate assistant flushes and malformed lines", () => {
+    expect(formatCursorLogEvent(assistantDuplicate("dup").trim())).toBeNull();
+    expect(formatCursorLogEvent("not json")).toBeNull();
+    expect(formatCursorLogEvent(ndjson({ type: "user" }).trim())).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Finding → annotation transform
+// ---------------------------------------------------------------------------
+
+describe("transformCursorFindings", () => {
+  const findings: CursorFinding[] = [
+    {
+      file: "/repo/packages/server/review.ts",
+      line: 10,
+      end_line: 12,
+      severity: "important",
+      description: "Off-by-one in the loop bound.",
+      reasoning: "Index reaches length, overrunning the array.",
+    },
+  ];
+
+  test("produces the shared annotation shape with author Cursor and severity-prefixed text", () => {
+    const [a] = transformCursorFindings(findings, "agent-abcd1234", "/repo");
+    expect(a.source).toBe("agent-abcd1234");
+    expect(a.filePath).toBe("packages/server/review.ts"); // relativized to cwd
+    expect(a.lineStart).toBe(10);
+    expect(a.lineEnd).toBe(12);
+    expect(a.side).toBe("new");
+    expect(a.scope).toBe("line");
+    expect(a.type).toBe("comment");
+    expect(a.author).toBe("Cursor");
+    expect(a.severity).toBe("important");
+    expect(a.text).toBe("[important] Off-by-one in the loop bound.");
+    expect(a.reasoning).toBe("Index reaches length, overrunning the array.");
+  });
+
+  test("applies the workspace path transform when provided", () => {
+    const [a] = transformCursorFindings(findings, "src", "/repo", (p) => `child/${p}`);
+    expect(a.filePath).toBe("child/packages/server/review.ts");
+  });
+
+  test("drops findings without a file or numeric line", () => {
+    const dirty = [
+      ...findings,
+      { file: "", line: 1, end_line: 1, severity: "nit", description: "x", reasoning: "" } as CursorFinding,
+      { file: "y.ts", line: undefined as unknown as number, end_line: 1, severity: "nit", description: "x", reasoning: "" } as CursorFinding,
+    ];
+    expect(transformCursorFindings(dirty, "src", "/repo")).toHaveLength(1);
+  });
+
+  test("defaults lineEnd to lineStart when end_line is absent", () => {
+    const f = [{ file: "/repo/a.ts", line: 7, severity: "nit", description: "d", reasoning: "" } as unknown as CursorFinding];
+    expect(transformCursorFindings(f, "src", "/repo")[0].lineEnd).toBe(7);
+  });
+});
diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
new file mode 100644
index 000000000..fe6f4526f
--- /dev/null
+++ b/packages/server/cursor-review.ts
@@ -0,0 +1,515 @@
+import { toRelativePath } from "./path-utils";
+
+/**
+ * Cursor CLI Review Agent — prompt, command builder, NDJSON stream reducer,
+ * marker-block parser, and finding transformer.
+ *
+ * Cursor differs from Claude/Codex in exactly one way: its headless CLI
+ * (the binary is literally named `agent`) exposes no schema-validation flag
+ * (`--json-schema` / `--output-schema` / `structured_output`). Its final task
+ * output is prose. That earns strict *parsing* — Cursor is told to emit a
+ * marker-delimited JSON block, and we extract the LAST complete block from the
+ * reconstructed canonical text. It does NOT earn extra isolation,
+ * finding-policing, env scrubbing, or a per-job marker nonce. Cursor inherits
+ * process.env like Claude/Codex; its read-only posture comes only from the
+ * command flags (`--mode ask` + `--sandbox enabled` + no `--force`).
+ *
+ * Cursor `stream-json` is decoded with a real line-buffered NDJSON reducer
+ * (pipe chunk boundaries are NOT NDJSON record boundaries), producing live log
+ * deltas and the canonical assistant/result text used for marker extraction.
+ */
+
+// ---------------------------------------------------------------------------
+// Static marker — v1 uses a STATIC marker plus last-block selection (no nonce).
+// If testing shows the model echoing the marker from the prompt, add a per-job
+// nonce then — not before (per approach doc §Structured Output Policy).
+// ---------------------------------------------------------------------------
+
+export const CURSOR_MARKER_OPEN = "<plannotator-review-json>";
+export const CURSOR_MARKER_CLOSE = "</plannotator-review-json>";
+
+// ---------------------------------------------------------------------------
+// Types — mirror ClaudeFinding/ClaudeReviewOutput, plus a freeform summary.
+// ---------------------------------------------------------------------------
+
+export type CursorSeverity = "important" | "nit" | "pre_existing";
+
+export interface CursorFinding {
+  file: string;
+  line: number;
+  end_line: number;
+  severity: CursorSeverity;
+  description: string;
+  reasoning: string;
+}
+
+export interface CursorReviewSummary {
+  correctness: string;
+  explanation: string;
+  confidence: number;
+}
+
+export interface CursorReviewOutput {
+  findings: CursorFinding[];
+  summary: CursorReviewSummary;
+}
+
+const VALID_SEVERITIES: ReadonlySet<string> = new Set([
+  "important",
+  "nit",
+  "pre_existing",
+]);
+
+// ---------------------------------------------------------------------------
+// Schema validator — hand-rolled (no Ajv): Cursor output is prompt-enforced,
+// so validation is the floor that turns prose back into a trusted object.
+// ---------------------------------------------------------------------------
+
+/**
+ * Validate a parsed object against the Cursor review schema.
+ * Returns the typed output, or null if the shape is invalid.
+ *
+ * Empty findings are valid as long as the JSON itself is valid and carries a
+ * valid summary (per approach doc §Structured Output Policy).
+ */
+export function validateCursorReviewOutput(parsed: unknown): CursorReviewOutput | null {
+  if (!parsed || typeof parsed !== "object") return null;
+  const obj = parsed as Record<string, unknown>;
+
+  if (!Array.isArray(obj.findings)) return null;
+
+  const summary = obj.summary;
+  if (!summary || typeof summary !== "object") return null;
+  const s = summary as Record<string, unknown>;
+  if (typeof s.correctness !== "string") return null;
+  if (typeof s.explanation !== "string") return null;
+  if (typeof s.confidence !== "number") return null;
+
+  const findings: CursorFinding[] = [];
+  for (const raw of obj.findings) {
+    if (!raw || typeof raw !== "object") return null;
+    const f = raw as Record<string, unknown>;
+    if (typeof f.file !== "string") return null;
+    if (typeof f.line !== "number") return null;
+    if (typeof f.severity !== "string" || !VALID_SEVERITIES.has(f.severity)) return null;
+    if (typeof f.description !== "string") return null;
+
+    findings.push({
+      file: f.file,
+      line: f.line,
+      end_line: typeof f.end_line === "number" ? f.end_line : f.line,
+      severity: f.severity as CursorSeverity,
+      description: f.description,
+      reasoning: typeof f.reasoning === "string" ? f.reasoning : "",
+    });
+  }
+
+  return {
+    findings,
+    summary: {
+      correctness: s.correctness,
+      explanation: s.explanation,
+      confidence: s.confidence,
+    },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Review prompt — mirrors CLAUDE_REVIEW_PROMPT, ending with the marker-block
+// output contract (Cursor has no schema flag, so the contract is the prompt).
+// ---------------------------------------------------------------------------
+
+export const CURSOR_REVIEW_PROMPT = `# Cursor Code Review System Prompt
+
+## Identity
+You are a code review system. Your job is to find bugs that would break
+production. You are not a linter, formatter, or style checker unless
+project guidance files explicitly expand your scope.
+
+## Pipeline
+
+Step 1: Gather context
+  - Retrieve the PR diff or local diff (gh pr diff, git diff, or jj diff)
+  - Read CLAUDE.md and REVIEW.md at the repo root and in every directory
+    containing modified files
+  - Build a map of which rules apply to which file paths
+  - Identify any skip rules (paths, patterns, or file types to ignore)
+
+Step 2: Review for issues
+  - Logic errors, regressions, broken edge cases, build failures, and code
+    that will produce wrong results.
+  - Security vulnerabilities with concrete exploit paths, race conditions, and
+    incorrect assumptions about trust boundaries.
+  - Code quality: unnecessary duplication, missed reuse of existing utilities,
+    overly complex implementations a senior engineer would care about.
+  - Guideline compliance: clear, unambiguous violations of CLAUDE.md / REVIEW.md
+    where you can cite the exact rule broken. Respect all skip rules.
+
+Step 3: Validate each candidate finding
+  - Trace the actual code path to confirm the issue is real.
+  - Check whether the issue is handled elsewhere (try/catch, upstream guard,
+    fallback logic, type system guarantees).
+  - If validation fails, drop the finding silently.
+  - If validation passes, write a clear reasoning chain — this becomes the
+    \`reasoning\` field.
+
+Step 4: Classify each validated finding with exactly one severity:
+
+  important — A bug that should be fixed before merging. Build failures, clear
+    logic errors, security vulnerabilities with exploit paths, data loss risks,
+    race conditions with observable consequences.
+
+  nit — A minor issue worth fixing but non-blocking. Style deviations from
+    project guidelines, code quality concerns, unlikely-but-worth-noting edge
+    cases, convention violations that don't affect correctness.
+
+  pre_existing — A bug that exists in the surrounding codebase but was NOT
+    introduced by this change. Only flag when directly relevant to the changed
+    code path.
+
+## Hard constraints
+- Never approve or block the change.
+- Never comment on formatting or code style unless guidance files say to.
+- Never flag missing test coverage unless guidance files say to.
+- Never invent rules — only enforce what CLAUDE.md or REVIEW.md state.
+- Prefer silence over false positives — when in doubt, drop the finding.
+- Do NOT modify files. This is a read-only review.
+- Do NOT post any comments to GitHub or GitLab. Do NOT use gh pr comment, glab,
+  or any commenting tool.
+
+## Output contract
+Your only machine-readable output is a single marker-delimited JSON block.
+Any natural-language commentary you write must come BEFORE the final marker
+block. Emit the block exactly once, as the last thing in your response:
+
+${CURSOR_MARKER_OPEN}
+{
+  "findings": [
+    {
+      "file": "packages/server/review.ts",
+      "line": 123,
+      "end_line": 123,
+      "severity": "important",
+      "description": "The issue...",
+      "reasoning": "Why this is a real issue..."
+    }
+  ],
+  "summary": {
+    "correctness": "Issues Found",
+    "explanation": "One important issue was found.",
+    "confidence": 0.85
+  }
+}
+${CURSOR_MARKER_CLOSE}
+
+Schema:
+- findings: array of objects, each with
+  - file: string (path as shown in the diff)
+  - line: integer (start line, post-change numbering)
+  - end_line: integer (end line; equal to line for a single line)
+  - severity: one of "important", "nit", "pre_existing"
+  - description: string (one paragraph)
+  - reasoning: string (how the issue was confirmed)
+- summary: object with
+  - correctness: string ("Correct" or "Issues Found")
+  - explanation: string (one sentence)
+  - confidence: number between 0 and 1
+
+If no issues are found, return an empty "findings" array with a valid summary.`;
+
+// ---------------------------------------------------------------------------
+// Command builder
+// ---------------------------------------------------------------------------
+
+export interface CursorCommandResult {
+  command: string[];
+  /** Prompt text written to stdin (preferred over argv, like Claude). */
+  stdinPrompt: string;
+}
+
+/**
+ * Build the `agent -p` command. NOTE the binary is `agent`, NOT `cursor`.
+ *
+ * Read-only posture comes entirely from `--mode ask` + `--sandbox enabled` and
+ * the absence of `--force`/`--yolo`. The prompt goes on stdin (preferred over
+ * argv — avoids quoting issues and argv size limits). `--model` is omitted when
+ * the model is `Auto`/empty so Cursor uses its default model selection.
+ * `--workspace` is set to the launch cwd when provided, matching the spawn cwd.
+ */
+export function buildCursorCommand(prompt: string, model?: string, cwd?: string): CursorCommandResult {
+  const useModel = !!model && model !== "Auto";
+
+  return {
+    command: [
+      "agent",
+      "-p",
+      "--mode",
+      "ask",
+      "--output-format",
+      "stream-json",
+      "--stream-partial-output",
+      ...(cwd ? ["--workspace", cwd] : []),
+      "--sandbox",
+      "enabled",
+      ...(useModel ? ["--model", model] : []),
+    ],
+    stdinPrompt: prompt,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Stream reduction — line-buffered NDJSON reducer.
+// ---------------------------------------------------------------------------
+
+interface CursorStreamEvent {
+  type?: string;
+  subtype?: string;
+  timestamp_ms?: number;
+  model_call_id?: string;
+  message?: { content?: unknown };
+  text?: string;
+  [key: string]: unknown;
+}
+
+/**
+ * A partial-output assistant event is a real new text delta only when
+ * `timestamp_ms` is present AND `model_call_id` is absent. All other assistant
+ * flushes are duplicate re-emissions (per approach doc §Log Streaming).
+ */
+function isRealAssistantDelta(event: CursorStreamEvent): boolean {
+  return event.timestamp_ms !== undefined && event.model_call_id === undefined;
+}
+
+/** Pull readable text out of an assistant event's content (string or parts). */
+function extractAssistantText(event: CursorStreamEvent): string {
+  if (typeof event.text === "string") return event.text;
+  const content = event.message?.content;
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((p): p is { type?: string; text?: string } => !!p && typeof p === "object")
+      .filter((p) => p.type === "text" && typeof p.text === "string")
+      .map((p) => p.text as string)
+      .join("");
+  }
+  return "";
+}
+
+export interface CursorStreamReduction {
+  /** Canonical assistant/result text — the substrate for marker extraction. */
+  canonicalText: string;
+  /** Number of NDJSON records that parsed successfully. */
+  recordCount: number;
+}
+
+/**
+ * Reduce a complete Cursor `stream-json` stdout buffer into canonical text.
+ *
+ * Critically line-buffered: only complete lines (terminated by `\n`) are parsed.
+ * A trailing partial line with no newline is treated as complete (the process
+ * has exited by the time this runs on the accumulated buffer), but mid-stream
+ * chunk boundaries never corrupt a record because we join the whole buffer
+ * before splitting on newlines.
+ */
+export function reduceCursorStreamEvents(stdout: string): CursorStreamReduction {
+  let canonicalText = "";
+  let recordCount = 0;
+
+  if (!stdout) return { canonicalText, recordCount };
+
+  for (const rawLine of stdout.split("\n")) {
+    const line = rawLine.trim();
+    if (!line) continue;
+
+    let event: CursorStreamEvent;
+    try {
+      event = JSON.parse(line) as CursorStreamEvent;
+    } catch {
+      continue; // not a complete/valid NDJSON record — skip
+    }
+    recordCount++;
+
+    if (event.type === "assistant") {
+      // For partial streaming, only append real new deltas. If no event in the
+      // stream carries timestamp_ms (partial streaming disabled), append the
+      // full assistant text once.
+      if (event.timestamp_ms !== undefined) {
+        if (isRealAssistantDelta(event)) canonicalText += extractAssistantText(event);
+      } else if (event.model_call_id === undefined) {
+        canonicalText += extractAssistantText(event);
+      }
+    } else if (event.type === "result") {
+      const resultText =
+        typeof event.result === "string"
+          ? event.result
+          : typeof event.text === "string"
+            ? event.text
+            : "";
+      if (resultText) canonicalText += resultText;
+    }
+  }
+
+  return { canonicalText, recordCount };
+}
+
+// ---------------------------------------------------------------------------
+// Marker-block parsing — reduce → take the LAST complete marker block → parse →
+// schema-validate. Returns null on any failure (caller fails the job).
+// ---------------------------------------------------------------------------
+
+/** Extract the content of the LAST complete marker block from canonical text. */
+function extractLastMarkerBlock(text: string): string | null {
+  let result: string | null = null;
+  let searchFrom = 0;
+
+  while (true) {
+    const open = text.indexOf(CURSOR_MARKER_OPEN, searchFrom);
+    if (open === -1) break;
+    const contentStart = open + CURSOR_MARKER_OPEN.length;
+    const close = text.indexOf(CURSOR_MARKER_CLOSE, contentStart);
+    if (close === -1) break; // no matching close — block is incomplete
+    result = text.slice(contentStart, close);
+    searchFrom = close + CURSOR_MARKER_CLOSE.length;
+  }
+
+  return result;
+}
+
+/**
+ * Parse Cursor `stream-json` stdout into a validated review output.
+ *
+ * Pipeline: line-buffered NDJSON reduce → reconstruct canonical text → take the
+ * LAST complete marker block → JSON.parse → schema-validate. Returns null on
+ * ANY failure (missing marker, malformed JSON, schema mismatch) so the caller
+ * can fail the job.
+ */
+export function parseCursorStreamOutput(stdout: string): CursorReviewOutput | null {
+  if (!stdout || !stdout.trim()) return null;
+
+  const { canonicalText } = reduceCursorStreamEvents(stdout);
+  if (!canonicalText) return null;
+
+  const block = extractLastMarkerBlock(canonicalText);
+  if (block === null) return null;
+
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(block.trim());
+  } catch {
+    return null;
+  }
+
+  return validateCursorReviewOutput(parsed);
+}
+
+// ---------------------------------------------------------------------------
+// Live log formatter — maps one NDJSON event to a readable log line.
+// ---------------------------------------------------------------------------
+
+/**
+ * Format one Cursor `stream-json` line for the LiveLogViewer.
+ * Returns a human-readable string, or null if the line should be skipped.
+ *
+ * Mirrors formatClaudeLogEvent. Applies the same partial-output dedup rule as
+ * the reducer: an assistant delta is shown only when `timestamp_ms` is present
+ * and `model_call_id` is absent.
+ */
+export function formatCursorLogEvent(line: string): string | null {
+  let event: CursorStreamEvent;
+  try {
+    event = JSON.parse(line) as CursorStreamEvent;
+  } catch {
+    return null;
+  }
+
+  switch (event.type) {
+    case "system": {
+      if (event.subtype === "init") {
+        const model = typeof event.model === "string" ? event.model : undefined;
+        const sessionId = typeof event.session_id === "string" ? event.session_id : undefined;
+        const bits = ["[init]"];
+        if (model) bits.push(`model=${model}`);
+        if (sessionId) bits.push(`session=${sessionId}`);
+        return bits.join(" ");
+      }
+      return null;
+    }
+    case "assistant": {
+      // Apply partial-output dedup: only show real new deltas. If the event
+      // carries no timestamp_ms at all (non-partial flush), show it once.
+      if (event.timestamp_ms !== undefined && !isRealAssistantDelta(event)) return null;
+      const text = extractAssistantText(event);
+      return text ? text : null;
+    }
+    case "tool_call": {
+      const name = typeof event.name === "string" ? event.name : "tool";
+      if (event.subtype === "completed") {
+        return `[${name}] completed`;
+      }
+      // started (or unspecified) — show concise args, never full file contents.
+      const args =
+        typeof event.args === "string"
+          ? event.args.slice(0, 100)
+          : event.args !== undefined
+            ? JSON.stringify(event.args).slice(0, 100)
+            : "";
+      return `[${name}] ${args}`.trimEnd();
+    }
+    case "result": {
+      const duration =
+        typeof event.duration_ms === "number" ? `${event.duration_ms}ms` : undefined;
+      const requestId = typeof event.request_id === "string" ? event.request_id : undefined;
+      const bits = ["[result]"];
+      if (duration) bits.push(duration);
+      if (requestId) bits.push(`request=${requestId}`);
+      return bits.length > 1 ? bits.join(" ") : null;
+    }
+    default:
+      return null;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Finding transform — Cursor findings → external annotations.
+// Identical shape to transformClaudeFindings, with author "Cursor".
+// ---------------------------------------------------------------------------
+
+export interface CursorReviewAnnotationInput {
+  source: string;
+  filePath: string;
+  lineStart: number;
+  lineEnd: number;
+  type: string;
+  side: string;
+  scope: string;
+  text: string;
+  severity: CursorSeverity;
+  reasoning: string;
+  author: string;
+}
+
+/** Transform Cursor findings into the external annotation format. */
+export function transformCursorFindings(
+  findings: CursorFinding[],
+  source: string,
+  cwd?: string,
+  pathTransform?: (path: string) => string,
+): CursorReviewAnnotationInput[] {
+  return findings
+    .filter((f) => f.file && typeof f.line === "number")
+    .map((f) => ({
+      source,
+      filePath: pathTransform
+        ? pathTransform(toRelativePath(f.file, cwd))
+        : toRelativePath(f.file, cwd),
+      lineStart: f.line,
+      lineEnd: f.end_line ?? f.line,
+      type: "comment",
+      side: "new",
+      scope: "line",
+      text: `[${f.severity}] ${f.description}`,
+      severity: f.severity,
+      reasoning: f.reasoning,
+      author: "Cursor",
+    }));
+}
diff --git a/packages/server/review.ts b/packages/server/review.ts
index 33acff0d1..cc6fd245d 100644
--- a/packages/server/review.ts
+++ b/packages/server/review.ts
@@ -58,6 +58,12 @@ import {
   transformClaudeFindings,
 } from "./claude-review";
 import { createTourSession, TOUR_EMPTY_OUTPUT_ERROR } from "./tour/tour-review";
+import {
+  CURSOR_REVIEW_PROMPT,
+  buildCursorCommand,
+  parseCursorStreamOutput,
+  transformCursorFindings,
+} from "./cursor-review";
 import { loadConfig, saveConfig, detectGitUser, getServerConfig } from "./config";
 import { type PRMetadata, type PRReviewFileComment, type PRStackTree, type PRListItem, fetchPR, fetchPRFileContent, fetchPRContext, submitPRReview, fetchPRViewedFiles, markPRFilesViewed, fetchPRStack, fetchPRList, getPRUser, parsePRUrl, prRefFromMetadata, isSameProject, getDisplayRepo, getMRLabel, getMRNumberLabel } from "./pr";
 import { AI_QUERY_ENDPOINT, createAIRuntime } from "./ai-runtime";
@@ -538,6 +544,17 @@ export async function startReviewServer(
         return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, effort, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
       }
 
+      if (provider === "cursor") {
+        // Cursor has no schema flag — its marker-block output contract lives in
+        // CURSOR_REVIEW_PROMPT. captureStdout is required (the marker block comes
+        // back on stdout, like Claude). buildCursorCommand puts the prompt on
+        // stdin and threads --workspace=cwd to match the spawn cwd.
+        const model = typeof config?.model === "string" && config.model ? config.model : undefined;
+        const prompt = CURSOR_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
+        const { command, stdinPrompt } = buildCursorCommand(prompt, model, cwd);
+        return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
+      }
+
       return null;
     },
 
@@ -611,6 +628,43 @@ export async function startReviewServer(
         return;
       }
 
+      // --- Cursor path ---
+      // FAIL-CLOSED: Cursor output is prompt-enforced (no schema flag), so any
+      // missing/malformed/schema/transform/insertion failure must MUTATE the job
+      // to failed — NEVER throw (agent-jobs.ts swallows throws, silently leaving
+      // an exit-0 job marked done). Mirrors the Tour fail-closed pattern below.
+      if (job.provider === "cursor") {
+        const output = meta.stdout ? parseCursorStreamOutput(meta.stdout) : null;
+        if (!output) {
+          job.status = "failed";
+          job.error = "Cursor review output missing or unparseable (no valid marker JSON).";
+          return;
+        }
+
+        job.summary = {
+          correctness: output.summary.correctness,
+          explanation: output.summary.explanation,
+          confidence: output.summary.confidence,
+        };
+
+        if (output.findings.length > 0) {
+          const annotations = transformCursorFindings(
+            output.findings,
+            job.source,
+            cwd,
+            workspace ? (filePath) => workspace.normalizeAnnotationPath(filePath) : undefined,
+          )
+            .map(a => ({ ...a, ...jobPrContext, ...(jobDiffScope && { diffScope: jobDiffScope }) }));
+          const result = externalAnnotations.addAnnotations({ annotations });
+          if ("error" in result) {
+            job.status = "failed";
+            job.error = `Cursor annotation insertion failed: ${result.error}`;
+            return;
+          }
+        }
+        return;
+      }
+
       // --- Tour path ---
       if (job.provider === "tour") {
         const { summary } = await tour.onJobComplete({ job, meta });
diff --git a/packages/ui/components/AgentsTab.tsx b/packages/ui/components/AgentsTab.tsx
index 55ec9f6d4..73fa8c3e6 100644
--- a/packages/ui/components/AgentsTab.tsx
+++ b/packages/ui/components/AgentsTab.tsx
@@ -17,7 +17,7 @@ import { isTerminalStatus } from '@plannotator/shared/agent-jobs';
 import { cn } from '../lib/utils';
 import { ReviewAgentsIcon } from './ReviewAgentsIcon';
 import { useAgentSettings } from '../hooks/useAgentSettings';
-import type { AgentEngine, AgentMode } from '../hooks/useAgentSettings';
+import type { AgentEngine, AgentMode, ReviewEngine } from '../hooks/useAgentSettings';
 
 // --- Agent option catalogs (shared across review + tour engine dropdowns) ---
 
@@ -68,6 +68,12 @@ const TOUR_CLAUDE_MODELS: Array<{ value: string; label: string }> = [
   { value: 'opus', label: 'Opus (thorough)' },
 ];
 
+// Cursor v1 minimal catalog — just Auto. Model discovery waits on a
+// provider-metadata contract (see approach doc § Model Discovery).
+const CURSOR_MODELS: Array<{ value: string; label: string }> = [
+  { value: 'Auto', label: 'Auto' },
+];
+
 const MODE_LABEL: Record<AgentMode, string> = {
   review: 'Code Review',
   tour: 'Code Tour',
@@ -78,6 +84,14 @@ const ENGINE_LABEL: Record<AgentEngine, string> = {
   codex: 'Codex',
 };
 
+// Review-only label map. Keeps Tour's ENGINE_LABEL exhaustive over the
+// narrow AgentEngine union while letting the review surface offer Cursor.
+const REVIEW_ENGINE_LABEL: Record<ReviewEngine, string> = {
+  claude: 'Claude',
+  codex: 'Codex',
+  cursor: 'Cursor',
+};
+
 interface AgentsTabProps {
   jobs: AgentJobInfo[];
   capabilities: AgentCapabilities | null;
@@ -146,6 +160,7 @@ function catalogLabel(list: Array<{ value: string; label: string }>, value: stri
 }
 
 function formatModel(provider: string, engine: string | undefined, model: string): string {
+  if (provider === 'cursor') return catalogLabel(CURSOR_MODELS, model);
   if (provider === 'codex' || engine === 'codex') return catalogLabel(CODEX_MODELS, model);
   if (provider === 'tour' && engine === 'claude') return catalogLabel(TOUR_CLAUDE_MODELS, model);
   return catalogLabel(CLAUDE_MODELS, model);
@@ -376,6 +391,7 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
     codexModel,
     codexReasoning,
     codexFast,
+    cursorModel,
     tourClaudeModel,
     tourClaudeEffort,
     tourCodexModel,
@@ -389,6 +405,7 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
     setCodexModel,
     setCodexReasoning,
     setCodexFast,
+    setCursorModel,
     setTourClaudeModel,
     setTourClaudeEffort,
     setTourCodexModel,
@@ -399,7 +416,9 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
   const claudeAvailable = capabilities?.providers.some((p) => p.id === 'claude' && p.available) ?? false;
   const codexAvailable = capabilities?.providers.some((p) => p.id === 'codex' && p.available) ?? false;
   const tourAvailable = capabilities?.providers.some((p) => p.id === 'tour' && p.available) ?? false;
+  const cursorAvailable = capabilities?.providers.some((p) => p.id === 'cursor' && p.available) ?? false;
 
+  // Tour engines (narrow union). Cursor is NOT included here — it is review-only.
   const availableEngines = useMemo<AgentEngine[]>(() => {
     const engines: AgentEngine[] = [];
     if (claudeAvailable) engines.push('claude');
@@ -407,15 +426,25 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
     return engines;
   }, [claudeAvailable, codexAvailable]);
 
+  // Review engines (wide union) = tour engines + cursor when available.
+  const availableReviewEngines = useMemo<ReviewEngine[]>(() => {
+    const engines: ReviewEngine[] = [...availableEngines];
+    if (cursorAvailable) engines.push('cursor');
+    return engines;
+  }, [availableEngines, cursorAvailable]);
+
   const availableModes = useMemo<AgentMode[]>(() => {
     const modes: AgentMode[] = [];
-    if (availableEngines.length > 0) modes.push('review');
+    if (availableReviewEngines.length > 0) modes.push('review');
     if (tourAvailable && availableEngines.length > 0) modes.push('tour');
     return modes;
-  }, [availableEngines.length, tourAvailable]);
+  }, [availableReviewEngines.length, availableEngines.length, tourAvailable]);
 
   const firstAvailableEngine = availableEngines[0] ?? null;
+  const firstAvailableReviewEngine = availableReviewEngines[0] ?? null;
   const engineAvailable = (engine: AgentEngine) => engine === 'claude' ? claudeAvailable : codexAvailable;
+  const reviewEngineAvailable = (engine: ReviewEngine) =>
+    engine === 'cursor' ? cursorAvailable : engineAvailable(engine);
 
   // Reconcile mode + engine choices against live capabilities. Runs when
   // capabilities change or the stored selection becomes invalid.
@@ -424,13 +453,17 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
     if (!selectedMode || !availableModes.includes(selectedMode)) {
       setSelectedMode(availableModes[0]);
     }
-    if (!firstAvailableEngine) return;
-    if (!engineAvailable(reviewEngine)) setReviewEngine(firstAvailableEngine);
-    if (!engineAvailable(tourEngine)) setTourEngine(firstAvailableEngine);
+    if (firstAvailableReviewEngine && !reviewEngineAvailable(reviewEngine)) {
+      setReviewEngine(firstAvailableReviewEngine);
+    }
+    if (firstAvailableEngine && !engineAvailable(tourEngine)) {
+      setTourEngine(firstAvailableEngine);
+    }
   }, [
     capabilities,
     availableModes,
     firstAvailableEngine,
+    firstAvailableReviewEngine,
     selectedMode,
     reviewEngine,
     tourEngine,
@@ -466,10 +499,19 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
   );
 
   type LaunchParams = Parameters<typeof onLaunch>[0];
-  const buildReviewLaunch = (engine: AgentEngine): LaunchParams => {
+  const buildReviewLaunch = (engine: ReviewEngine): LaunchParams => {
     if (engine === 'claude') {
       return { provider: 'claude', label: 'Code Review', model: claudeModel, effort: claudeEffort };
     }
+    if (engine === 'cursor') {
+      // Omission ⇒ Auto: drop model client-side when Auto so the POST carries
+      // no model and the server applies Cursor's default behavior.
+      return {
+        provider: 'cursor',
+        label: 'Code Review',
+        ...(cursorModel && cursorModel !== 'Auto' ? { model: cursorModel } : {}),
+      };
+    }
     return {
       provider: 'codex',
       label: 'Code Review',
@@ -489,7 +531,7 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
   });
 
   const canLaunch = selectedMode === 'review'
-    ? engineAvailable(reviewEngine)
+    ? reviewEngineAvailable(reviewEngine)
     : selectedMode === 'tour'
       ? tourAvailable && engineAvailable(tourEngine)
       : false;
@@ -501,6 +543,7 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
 
   const modeOptions = availableModes.map((mode) => ({ value: mode, label: MODE_LABEL[mode] }));
   const engineOptions = availableEngines.map((engine) => ({ value: engine, label: ENGINE_LABEL[engine] }));
+  const reviewEngineOptions = availableReviewEngines.map((engine) => ({ value: engine, label: REVIEW_ENGINE_LABEL[engine] }));
   const renderStaticChoice = (label: string, icon?: React.ReactNode) => (
     <div className="flex items-center gap-2 rounded-lg border border-border/30 bg-surface-1/30 px-2.5 py-1.5">
       {icon}
@@ -508,16 +551,23 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
     </div>
   );
 
-  const renderEngineSelect = (value: AgentEngine, onChange: (engine: AgentEngine) => void) => (
+  // Engine picker, parameterized by the engine list + options so the review
+  // surface can use the wide (cursor-inclusive) variant while Tour stays narrow.
+  const renderEngineSelect = <T extends string>(
+    value: T,
+    onChange: (engine: T) => void,
+    options: Array<{ value: string; label: string }>,
+    fallbackLabel: string,
+  ) => (
     <ConfigRow label="Engine" stacked>
-      {availableEngines.length > 1 ? (
+      {options.length > 1 ? (
         <SelectMenu
           value={value}
-          options={engineOptions}
-          onChange={(next) => onChange(next as AgentEngine)}
+          options={options}
+          onChange={(next) => onChange(next as T)}
         />
       ) : (
-        renderStaticChoice(engineOptions[0]?.label ?? ENGINE_LABEL[value])
+        renderStaticChoice(options[0]?.label ?? fallbackLabel)
       )}
     </ConfigRow>
   );
@@ -549,7 +599,7 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
 
             {selectedMode === 'review' && (
               <>
-                {renderEngineSelect(reviewEngine, setReviewEngine)}
+                {renderEngineSelect(reviewEngine, setReviewEngine, reviewEngineOptions, REVIEW_ENGINE_LABEL[reviewEngine])}
                 {reviewEngine === 'claude' && (
                   <>
                     <ConfigRow label="Model" stacked>
@@ -573,12 +623,23 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
                     </ConfigRow>
                   </>
                 )}
+                {reviewEngine === 'cursor' && (
+                  <>
+                    <div className="flex items-center gap-1.5 text-[10px] text-amber-600 dark:text-amber-400">
+                      <span className="rounded bg-amber-500/10 px-1 py-px font-medium">experimental</span>
+                      <span className="text-muted-foreground/50">Findings are prompt-enforced</span>
+                    </div>
+                    <ConfigRow label="Model" stacked>
+                      <SelectMenu value={cursorModel} options={CURSOR_MODELS} onChange={setCursorModel} />
+                    </ConfigRow>
+                  </>
+                )}
               </>
             )}
 
             {selectedMode === 'tour' && (
               <>
-                {renderEngineSelect(tourEngine, setTourEngine)}
+                {renderEngineSelect(tourEngine, setTourEngine, engineOptions, ENGINE_LABEL[tourEngine])}
                 <ConfigRow label="Model" stacked>
                   <SelectMenu
                     value={tourEngine === 'claude' ? tourClaudeModel : tourCodexModel}
diff --git a/packages/ui/hooks/useAgentSettings.ts b/packages/ui/hooks/useAgentSettings.ts
index b4fca1049..37fbc00f0 100644
--- a/packages/ui/hooks/useAgentSettings.ts
+++ b/packages/ui/hooks/useAgentSettings.ts
@@ -13,6 +13,7 @@ export const DEFAULT_TOUR_CLAUDE_EFFORT = 'medium';
 export const DEFAULT_TOUR_CODEX_MODEL = 'gpt-5.3-codex';
 export const DEFAULT_TOUR_CODEX_REASONING = 'medium';
 export const DEFAULT_TOUR_CODEX_FAST = false;
+export const DEFAULT_CURSOR_MODEL = 'Auto';
 
 interface ClaudeSection {
   model: string;
@@ -24,15 +25,25 @@ interface CodexSection {
   perModel: Record<string, { reasoning: string; fast: boolean }>;
 }
 
+// Cursor has no per-model sub-settings (no effort/reasoning), so a flat
+// { model } section is enough — deliberately simpler than Claude/Codex.
+interface CursorSection {
+  model: string; // 'Auto' or a discovered/custom model string
+}
+
 export type AgentMode = 'review' | 'tour';
 export type AgentEngine = 'claude' | 'codex';
+// Review-only engine union. Tour stays on the narrow AgentEngine so its
+// exhaustive Record<AgentEngine, ...> maps remain valid without change.
+export type ReviewEngine = AgentEngine | 'cursor';
 
 interface AgentSettingsState {
   selectedMode?: AgentMode;
-  reviewEngine: AgentEngine;
+  reviewEngine: ReviewEngine;
   tourEngine: AgentEngine;
   claude: ClaudeSection;
   codex: CodexSection;
+  cursor: CursorSection;
   tourClaude: ClaudeSection;
   tourCodex: CodexSection;
 }
@@ -43,6 +54,7 @@ const initialState: AgentSettingsState = {
   tourEngine: 'claude',
   claude: { model: DEFAULT_CLAUDE_MODEL, perModel: {} },
   codex: { model: DEFAULT_CODEX_MODEL, perModel: {} },
+  cursor: { model: DEFAULT_CURSOR_MODEL },
   tourClaude: { model: DEFAULT_TOUR_CLAUDE_MODEL, perModel: {} },
   tourCodex: { model: DEFAULT_TOUR_CODEX_MODEL, perModel: {} },
 };
@@ -70,6 +82,11 @@ function parseEngine(value: unknown): AgentEngine {
   return value === 'codex' ? 'codex' : 'claude';
 }
 
+function parseReviewEngine(value: unknown): ReviewEngine {
+  if (value === 'cursor') return 'cursor';
+  return parseEngine(value);
+}
+
 function parseMode(value: unknown): AgentMode | undefined {
   if (value === 'review' || value === 'tour') return value;
   return undefined;
@@ -82,7 +99,7 @@ function readCookie(): AgentSettingsState {
     const parsed = JSON.parse(raw);
     return {
       selectedMode: parseMode(parsed.selectedMode) ?? initialState.selectedMode,
-      reviewEngine: parseEngine(parsed.reviewEngine),
+      reviewEngine: parseReviewEngine(parsed.reviewEngine),
       tourEngine: parseEngine(parsed.tourEngine),
       claude: {
         model: typeof parsed.claude?.model === 'string' ? parsed.claude.model : DEFAULT_CLAUDE_MODEL,
@@ -92,6 +109,9 @@ function readCookie(): AgentSettingsState {
         model: typeof parsed.codex?.model === 'string' ? parsed.codex.model : DEFAULT_CODEX_MODEL,
         perModel: sanitizeCodexPerModel(parsed.codex?.perModel),
       },
+      cursor: {
+        model: typeof parsed.cursor?.model === 'string' ? parsed.cursor.model : DEFAULT_CURSOR_MODEL,
+      },
       tourClaude: {
         model: typeof parsed.tourClaude?.model === 'string' ? parsed.tourClaude.model : DEFAULT_TOUR_CLAUDE_MODEL,
         perModel: parsed.tourClaude?.perModel ?? {},
@@ -117,7 +137,7 @@ export function useAgentSettings() {
     setState((s) => ({ ...s, selectedMode: mode }));
   }, []);
 
-  const setReviewEngine = useCallback((engine: AgentEngine) => {
+  const setReviewEngine = useCallback((engine: ReviewEngine) => {
     setState((s) => ({ ...s, reviewEngine: engine }));
   }, []);
 
@@ -155,6 +175,10 @@ export function useAgentSettings() {
     setState((s) => ({ ...s, codex: { ...s.codex, model } }));
   }, []);
 
+  const setCursorModel = useCallback((model: string) => {
+    setState((s) => ({ ...s, cursor: { ...s.cursor, model } }));
+  }, []);
+
   const patchCodex = useCallback(
     (
       section: 'codex' | 'tourCodex',
@@ -223,6 +247,7 @@ export function useAgentSettings() {
     codexModel: state.codex.model,
     codexReasoning,
     codexFast,
+    cursorModel: state.cursor.model,
     tourClaudeModel: state.tourClaude.model,
     tourClaudeEffort,
     tourCodexModel: state.tourCodex.model,
@@ -236,6 +261,7 @@ export function useAgentSettings() {
     setCodexModel,
     setCodexReasoning,
     setCodexFast,
+    setCursorModel,
     setTourClaudeModel,
     setTourClaudeEffort,
     setTourCodexModel,

From 98e18b9098bc8d567552eea0406bf687737c138e Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 14:17:54 -0700
Subject: [PATCH 2/7] fix(review): address Cursor provider review feedback (PR
 #912)

- [P1] Pass the review prompt as the trailing positional argv arg instead of
  stdin. 'agent' reads task text from argv ([prompt...]); on stdin it received
  an empty prompt and never saw the diff/instructions, so no real review ran.
- [P2] Add --trust to the headless command. Without it 'agent -p' stops on an
  interactive workspace-trust prompt a background job can't answer, failing the
  first review in any untrusted repo. Safe alongside --mode ask + --sandbox.
- [P2] Derive the job verdict from finding severities (like Claude) instead of
  storing Cursor's free-form summary.correctness. A value like 'not correct'
  was accepted verbatim and the detail panel (string contains 'correct' except
  'incorrect' -> green) would invert the displayed result.
- [P3] Carry a partial-line buffer across stdout chunks in the live-log drainer.
  stream-json is NDJSON with arbitrary chunk boundaries; records split across
  chunks were dropped from live logs (fixes Claude's same latent bug too).

All changes mirrored into the Pi runtime (serverReview.ts, agent-jobs.ts).
Typecheck green; 58 tests pass across cursor-review + affected families.
---
 apps/pi-extension/server/agent-jobs.ts   | 63 +++++++++++++-----------
 apps/pi-extension/server/serverReview.ts | 16 ++++--
 packages/server/agent-jobs.ts            | 61 ++++++++++++-----------
 packages/server/cursor-review.test.ts    | 11 +++--
 packages/server/cursor-review.ts         | 19 ++++---
 packages/server/review.ts                | 16 ++++--
 6 files changed, 106 insertions(+), 80 deletions(-)

diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
index 696c56212..34648d767 100644
--- a/apps/pi-extension/server/agent-jobs.ts
+++ b/apps/pi-extension/server/agent-jobs.ts
@@ -195,40 +195,45 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 			if (spawnOptions?.cwd) jobOutputPaths.set(`${id}:cwd`, spawnOptions.cwd);
 			broadcast({ type: "job:started", job: { ...info } });
 
-			// --- Stdout capture (Claude JSONL streaming) ---
+			// --- Stdout capture (Claude/Cursor stream-json) ---
 			let stdoutBuf = "";
 			if (captureStdout && proc.stdout) {
+				// Format one complete JSONL line into a live-log delta (skip result
+				// events — handled in onJobComplete).
+				const emitLogLine = (line: string) => {
+					if (!line.trim()) return;
+					// Tour jobs with the Claude engine also stream Claude JSONL.
+					if (provider === "claude" || spawnOptions?.engine === "claude") {
+						const formatted = formatClaudeLogEvent(line);
+						if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+						return;
+					}
+					// Cursor: map stream-json events (init/assistant/tool_call/result)
+					// into readable log deltas, applying the partial-output dedup rule.
+					if (provider === "cursor") {
+						const formatted = formatCursorLogEvent(line);
+						if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+						return;
+					}
+					try {
+						const event = JSON.parse(line);
+						if (event.type === 'result') return;
+					} catch { /* not JSON — forward as raw log */ }
+					broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
+				};
+				// stream-json output is NDJSON and chunk boundaries are arbitrary —
+				// carry the trailing partial line until a later chunk completes it,
+				// otherwise records split across chunks are dropped from live logs.
+				let logLineCarry = "";
 				proc.stdout.on("data", (chunk: Buffer) => {
 					const text = chunk.toString();
 					stdoutBuf += text;
-
-					// Forward JSONL lines as log events
-					const lines = text.split('\n');
-					for (const line of lines) {
-						if (!line.trim()) continue;
-						// Tour jobs with the Claude engine also stream Claude JSONL.
-						if (provider === "claude" || spawnOptions?.engine === "claude") {
-							const formatted = formatClaudeLogEvent(line);
-							if (formatted !== null) {
-								broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
-							}
-							continue;
-						}
-						// Cursor: map stream-json events (init/assistant/tool_call/result)
-						// into readable log deltas, applying the partial-output dedup rule.
-						if (provider === "cursor") {
-							const formatted = formatCursorLogEvent(line);
-							if (formatted !== null) {
-								broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
-							}
-							continue;
-						}
-						try {
-							const event = JSON.parse(line);
-							if (event.type === 'result') continue;
-						} catch { /* not JSON — forward as raw log */ }
-						broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
-					}
+					const lines = (logLineCarry + text).split('\n');
+					logLineCarry = lines.pop() ?? "";
+					for (const line of lines) emitLogLine(line);
+				});
+				proc.stdout.on("end", () => {
+					if (logLineCarry) emitLogLine(logLineCarry);
 				});
 			}
 
diff --git a/apps/pi-extension/server/serverReview.ts b/apps/pi-extension/server/serverReview.ts
index 20a8d7282..8743c252a 100644
--- a/apps/pi-extension/server/serverReview.ts
+++ b/apps/pi-extension/server/serverReview.ts
@@ -576,12 +576,12 @@ export async function startReviewServer(options: {
 			if (provider === "cursor") {
 				// Cursor has no schema flag — its marker-block output contract lives in
 				// CURSOR_REVIEW_PROMPT. captureStdout is required (the marker block comes
-				// back on stdout, like Claude). buildCursorCommand puts the prompt on
-				// stdin and threads --workspace=cwd to match the spawn cwd.
+				// back on stdout, like Claude). buildCursorCommand passes the prompt as
+				// the trailing argv arg and threads --workspace=cwd to match the spawn cwd.
 				const model = typeof config?.model === "string" && config.model ? config.model : undefined;
 				const prompt = CURSOR_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
-				const { command, stdinPrompt } = buildCursorCommand(prompt, model, cwd);
-				return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
+				const { command } = buildCursorCommand(prompt, model, cwd);
+				return { command, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
 			}
 
 			return null;
@@ -666,8 +666,14 @@ export async function startReviewServer(options: {
 					return;
 				}
 
+				// Derive the verdict from finding severities (like Claude) rather than
+				// trusting Cursor's free-form `correctness` string. Cursor has no schema
+				// flag, so a model value like "not correct" would be stored verbatim and
+				// the detail panel (any string containing "correct" except "incorrect" →
+				// green) would invert the displayed result.
+				const hasImportant = output.findings.some((f) => f.severity === "important");
 				job.summary = {
-					correctness: output.summary.correctness,
+					correctness: hasImportant ? "Issues Found" : "Correct",
 					explanation: output.summary.explanation,
 					confidence: output.summary.confidence,
 				};
diff --git a/packages/server/agent-jobs.ts b/packages/server/agent-jobs.ts
index c6ee893bf..c53c2f090 100644
--- a/packages/server/agent-jobs.ts
+++ b/packages/server/agent-jobs.ts
@@ -247,41 +247,44 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
       let stdoutBuf = "";
       const stdoutDone = (captureStdout && proc.stdout && typeof proc.stdout !== "number")
         ? (async () => {
+            // Format one complete JSONL line into a live-log delta (skip result
+            // events — those are handled in onJobComplete).
+            const emitLogLine = (line: string) => {
+              if (!line.trim()) return;
+              // Claude: format JSONL into readable text. Tour jobs with the
+              // Claude engine also stream Claude JSONL, so key off engine too.
+              if (provider === "claude" || spawnOptions?.engine === "claude") {
+                const formatted = formatClaudeLogEvent(line);
+                if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+                return;
+              }
+              // Cursor: map stream-json events (init/assistant/tool_call/result)
+              // into readable log deltas, applying the partial-output dedup rule.
+              if (provider === "cursor") {
+                const formatted = formatCursorLogEvent(line);
+                if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
+                return;
+              }
+              try {
+                const event = JSON.parse(line);
+                if (event.type === 'result') return;
+              } catch { /* not JSON — forward as raw log */ }
+              broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
+            };
             try {
               const reader = proc!.stdout as unknown as AsyncIterable<Uint8Array>;
+              // stream-json output is NDJSON and chunk boundaries are arbitrary —
+              // carry the trailing partial line until a later chunk completes it,
+              // otherwise records split across chunks are dropped from live logs.
+              let logLineCarry = "";
               for await (const chunk of reader) {
                 const text = typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk);
                 stdoutBuf += text;
-
-                // Forward JSONL lines as log events (skip result events)
-                const lines = text.split('\n');
-                for (const line of lines) {
-                  if (!line.trim()) continue;
-                  // Claude: format JSONL into readable text. Tour jobs with the
-                  // Claude engine also stream Claude JSONL, so key off engine too.
-                  if (provider === "claude" || spawnOptions?.engine === "claude") {
-                    const formatted = formatClaudeLogEvent(line);
-                    if (formatted !== null) {
-                      broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
-                    }
-                    continue;
-                  }
-                  // Cursor: map stream-json events (init/assistant/tool_call/result)
-                  // into readable log deltas, applying the partial-output dedup rule.
-                  if (provider === "cursor") {
-                    const formatted = formatCursorLogEvent(line);
-                    if (formatted !== null) {
-                      broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
-                    }
-                    continue;
-                  }
-                  try {
-                    const event = JSON.parse(line);
-                    if (event.type === 'result') continue; // handled in onJobComplete
-                  } catch { /* not JSON — forward as raw log */ }
-                  broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
-                }
+                const lines = (logLineCarry + text).split('\n');
+                logLineCarry = lines.pop() ?? "";
+                for (const line of lines) emitLogLine(line);
               }
+              if (logLineCarry) emitLogLine(logLineCarry);
             } catch {
               // Stream closed
             }
diff --git a/packages/server/cursor-review.test.ts b/packages/server/cursor-review.test.ts
index bd724037d..04ba82c48 100644
--- a/packages/server/cursor-review.test.ts
+++ b/packages/server/cursor-review.test.ts
@@ -63,8 +63,8 @@ function chunkify(s: string, parts: number): string[] {
 // ---------------------------------------------------------------------------
 
 describe("buildCursorCommand", () => {
-  test("uses the `agent` binary and read-only flags, prompt on stdin", () => {
-    const { command, stdinPrompt } = buildCursorCommand("review this", "gpt-5", "/repo");
+  test("uses the `agent` binary and read-only flags, prompt as trailing argv", () => {
+    const { command } = buildCursorCommand("review this", "gpt-5", "/repo");
     expect(command[0]).toBe("agent");
     expect(command).toContain("-p");
     // Read-only posture: --mode ask + --sandbox enabled, NO --force/--yolo.
@@ -74,6 +74,8 @@ describe("buildCursorCommand", () => {
     expect(command[command.indexOf("--sandbox") + 1]).toBe("enabled");
     expect(command).not.toContain("--force");
     expect(command).not.toContain("--yolo");
+    // Headless print mode needs --trust to skip the interactive workspace-trust prompt.
+    expect(command).toContain("--trust");
     // stream-json + partial output for live logs.
     expect(command).toContain("--output-format");
     expect(command[command.indexOf("--output-format") + 1]).toBe("stream-json");
@@ -81,9 +83,8 @@ describe("buildCursorCommand", () => {
     // workspace matches launch cwd; model present.
     expect(command[command.indexOf("--workspace") + 1]).toBe("/repo");
     expect(command[command.indexOf("--model") + 1]).toBe("gpt-5");
-    // Prompt is NOT in argv — it goes on stdin.
-    expect(stdinPrompt).toBe("review this");
-    expect(command).not.toContain("review this");
+    // Prompt is the trailing positional argv arg — agent reads it from argv, not stdin.
+    expect(command[command.length - 1]).toBe("review this");
   });
 
   test("omits --model when model is Auto or empty", () => {
diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
index fe6f4526f..2347f8221 100644
--- a/packages/server/cursor-review.ts
+++ b/packages/server/cursor-review.ts
@@ -223,18 +223,21 @@ If no issues are found, return an empty "findings" array with a valid summary.`;
 
 export interface CursorCommandResult {
   command: string[];
-  /** Prompt text written to stdin (preferred over argv, like Claude). */
-  stdinPrompt: string;
 }
 
 /**
  * Build the `agent -p` command. NOTE the binary is `agent`, NOT `cursor`.
  *
  * Read-only posture comes entirely from `--mode ask` + `--sandbox enabled` and
- * the absence of `--force`/`--yolo`. The prompt goes on stdin (preferred over
- * argv — avoids quoting issues and argv size limits). `--model` is omitted when
- * the model is `Auto`/empty so Cursor uses its default model selection.
- * `--workspace` is set to the launch cwd when provided, matching the spawn cwd.
+ * the absence of `--force`/`--yolo`. `--trust` is required in headless print
+ * mode: without it Cursor stops on an interactive workspace-trust prompt that a
+ * background job can never answer. It is safe here because the run is already
+ * constrained to read-only ask mode with the sandbox enabled.
+ *
+ * The prompt is the trailing positional argument — `agent` reads task text from
+ * argv (`[prompt...]`), not stdin. `--model` is omitted when the model is
+ * `Auto`/empty so Cursor uses its default model selection. `--workspace` is set
+ * to the launch cwd when provided, matching the spawn cwd.
  */
 export function buildCursorCommand(prompt: string, model?: string, cwd?: string): CursorCommandResult {
   const useModel = !!model && model !== "Auto";
@@ -248,12 +251,14 @@ export function buildCursorCommand(prompt: string, model?: string, cwd?: string)
       "--output-format",
       "stream-json",
       "--stream-partial-output",
+      "--trust",
       ...(cwd ? ["--workspace", cwd] : []),
       "--sandbox",
       "enabled",
       ...(useModel ? ["--model", model] : []),
+      // Prompt is the trailing positional arg — agent reads it from argv, not stdin.
+      prompt,
     ],
-    stdinPrompt: prompt,
   };
 }
 
diff --git a/packages/server/review.ts b/packages/server/review.ts
index cc6fd245d..515e0917c 100644
--- a/packages/server/review.ts
+++ b/packages/server/review.ts
@@ -547,12 +547,12 @@ export async function startReviewServer(
       if (provider === "cursor") {
         // Cursor has no schema flag — its marker-block output contract lives in
         // CURSOR_REVIEW_PROMPT. captureStdout is required (the marker block comes
-        // back on stdout, like Claude). buildCursorCommand puts the prompt on
-        // stdin and threads --workspace=cwd to match the spawn cwd.
+        // back on stdout, like Claude). buildCursorCommand passes the prompt as
+        // the trailing argv arg and threads --workspace=cwd to match the spawn cwd.
         const model = typeof config?.model === "string" && config.model ? config.model : undefined;
         const prompt = CURSOR_REVIEW_PROMPT + "\n\n---\n\n" + userMessage;
-        const { command, stdinPrompt } = buildCursorCommand(prompt, model, cwd);
-        return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
+        const { command } = buildCursorCommand(prompt, model, cwd);
+        return { command, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext };
       }
 
       return null;
@@ -641,8 +641,14 @@ export async function startReviewServer(
           return;
         }
 
+        // Derive the verdict from finding severities (like Claude) rather than
+        // trusting Cursor's free-form `correctness` string. Cursor has no schema
+        // flag, so a model value like "not correct" would be stored verbatim and
+        // the detail panel (any string containing "correct" except "incorrect" →
+        // green) would invert the displayed result.
+        const hasImportant = output.findings.some((f) => f.severity === "important");
         job.summary = {
-          correctness: output.summary.correctness,
+          correctness: hasImportant ? "Issues Found" : "Correct",
           explanation: output.summary.explanation,
           confidence: output.summary.confidence,
         };

From ac41eac49796c1226c9f598352477b90e0da65e5 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 19:24:41 -0700
Subject: [PATCH 3/7] fix(review): address second-pass Cursor review feedback
 (PR #912)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- [P2] Skip Cursor's final buffered assistant flush in live logs. We always run
  with --stream-partial-output, so only real deltas (timestamp_ms present,
  model_call_id absent) should show; the no-timestamp end-of-turn flush repeats
  the whole assistant output. Added a test.
- [important] Verify the 'agent' binary is actually Cursor before offering it.
  'agent' is a generic name; capability detection now probes 'agent about
  --format json' (fast, unauthenticated) and requires a cliVersion field, so an
  unrelated 'agent' on PATH is never spawned with review content.
- [nit] Cursor is fail-closed: an unexpected throw in onJobComplete now flips the
  job to failed instead of being swallowed (Claude/Codex stay fail-open).
- [P3] Cursor jobs now label as 'Cursor' (was 'Shell') in the detail panel.
- [nit] Cursor's single-option model row renders static text, not a dead dropdown.
- [nit] Pi buildCommand JSDoc now lists cursor.

All server changes mirrored Bun<->Pi. Typecheck green; 59 tests pass.

Note: the argv-size finding was triaged and intentionally NOT changed — Cursor
passes the prompt as argv exactly like Codex already does (parity, not a
regression). The diff is only embedded for full-stack/workspace/unknown-VCS
reviews; normal reviews hand the agent a git instruction or URL.
---
 apps/pi-extension/server/agent-jobs.ts        | 40 ++++++++++++++---
 .../dock/panels/ReviewAgentJobDetailPanel.tsx |  2 +-
 packages/server/agent-jobs.ts                 | 43 +++++++++++++++++--
 packages/server/cursor-review.test.ts         |  8 ++++
 packages/server/cursor-review.ts              |  9 ++--
 packages/ui/components/AgentsTab.tsx          |  6 ++-
 6 files changed, 94 insertions(+), 14 deletions(-)

diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
index 34648d767..e3742fba4 100644
--- a/apps/pi-extension/server/agent-jobs.ts
+++ b/apps/pi-extension/server/agent-jobs.ts
@@ -64,7 +64,7 @@ export interface AgentJobHandlerOptions {
 	mode: "plan" | "review" | "annotate";
 	getServerUrl: () => string;
 	getCwd: () => string;
-	/** Server-side command builder for known providers (codex, claude, tour). */
+	/** Server-side command builder for known providers (codex, claude, tour, cursor). */
 	buildCommand?: (provider: string, config?: Record<string, unknown>) => Promise<{
 		command: string[];
 		outputPath?: string;
@@ -94,6 +94,28 @@ export interface AgentJobHandlerOptions {
 	onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
 }
 
+/**
+ * `agent` is a very generic binary name — other tools ship binaries called
+ * `agent` too, so finding one on PATH does not prove it is Cursor. Verify its
+ * identity before offering Cursor as a provider. Cheap one-time probe: `agent
+ * about --format json` is fast, works unauthenticated, and only the Cursor CLI
+ * emits a JSON object with a `cliVersion` field. Any failure → not Cursor.
+ */
+function isCursorAgentAvailable(): boolean {
+	if (!whichCmd("agent")) return false;
+	try {
+		const out = execFileSync("agent", ["about", "--format", "json"], {
+			timeout: 3000,
+			stdio: ["ignore", "pipe", "ignore"],
+			encoding: "utf8",
+		});
+		const parsed = JSON.parse(out) as { cliVersion?: unknown };
+		return parsed != null && typeof parsed.cliVersion === "string";
+	} catch {
+		return false;
+	}
+}
+
 export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 	const { mode, getServerUrl, getCwd } = options;
 
@@ -108,8 +130,9 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 		{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
 		{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
 		{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
-		// Cursor CLI's binary is literally named `agent` (NOT `cursor`).
-		{ id: "cursor", name: "Cursor CLI", available: whichCmd("agent") },
+		// Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
+		// identity rather than trusting the name alone (see isCursorAgentAvailable).
+		{ id: "cursor", name: "Cursor CLI", available: isCursorAgentAvailable() },
 	];
 	const capabilitiesResponse: AgentCapabilities = {
 		mode,
@@ -291,8 +314,15 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 							stdout: captureStdout ? stdoutBuf : undefined,
 							cwd: jobCwd,
 						});
-					} catch {
-						// Result ingestion failure shouldn't prevent job completion broadcast
+					} catch (err) {
+						// Claude/Codex are fail-open; Cursor is fail-closed — an unexpected
+						// throw during prompt-enforced ingestion must fail the job, not pass
+						// it. (The Cursor handler normally fails by mutation and never throws;
+						// this guards future refactors.)
+						if (provider === "cursor") {
+							entry.info.status = "failed";
+							entry.info.error = err instanceof Error ? err.message : "Cursor result ingestion failed";
+						}
 					}
 				}
 				jobOutputPaths.delete(id);
diff --git a/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx b/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx
index d1c2b2ff5..6afa5a5d1 100644
--- a/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx
+++ b/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx
@@ -350,7 +350,7 @@ function ProviderPill({ provider, engine, model }: { provider: string; engine?:
     const engineLabel = engine === 'codex' ? 'Codex' : 'Claude';
     label = model && engine !== 'codex' ? `Tour · ${engineLabel} ${model.charAt(0).toUpperCase() + model.slice(1)}` : `Tour · ${engineLabel}`;
   } else {
-    label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : 'Shell';
+    label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : provider === 'cursor' ? 'Cursor' : 'Shell';
   }
   return (
     <span className={`text-[10px] font-semibold uppercase tracking-wider px-1.5 py-0.5 rounded ${
diff --git a/packages/server/agent-jobs.ts b/packages/server/agent-jobs.ts
index c53c2f090..9750c7432 100644
--- a/packages/server/agent-jobs.ts
+++ b/packages/server/agent-jobs.ts
@@ -105,6 +105,32 @@ export interface AgentJobHandlerOptions {
   onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
 }
 
+/**
+ * `agent` is a very generic binary name — other tools ship binaries called
+ * `agent` too, so finding one on PATH does not prove it is Cursor. Confirm its
+ * identity before offering Cursor as a provider; otherwise selecting Cursor
+ * could spawn an unrelated tool with review content as an argument.
+ *
+ * Cheap one-time probe: `agent about --format json` is fast, works
+ * unauthenticated, and only the Cursor CLI emits a JSON object with a
+ * `cliVersion` field. Any failure/timeout/mismatch → treat as not Cursor.
+ */
+function isCursorAgentAvailable(): boolean {
+  if (!Bun.which("agent")) return false;
+  try {
+    const res = Bun.spawnSync(["agent", "about", "--format", "json"], {
+      stdout: "pipe",
+      stderr: "ignore",
+      timeout: 3000,
+    });
+    if (!res.success) return false;
+    const parsed = JSON.parse(new TextDecoder().decode(res.stdout)) as { cliVersion?: unknown };
+    return parsed != null && typeof parsed.cliVersion === "string";
+  } catch {
+    return false;
+  }
+}
+
 export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJobHandler {
   const { mode, getServerUrl, getCwd } = options;
 
@@ -120,8 +146,9 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
     { id: "claude", name: "Claude Code", available: !!Bun.which("claude") },
     { id: "codex", name: "Codex CLI", available: !!Bun.which("codex") },
     { id: "tour", name: "Code Tour", available: !!Bun.which("claude") || !!Bun.which("codex") },
-    // Cursor CLI's binary is literally named `agent` (NOT `cursor`).
-    { id: "cursor", name: "Cursor CLI", available: !!Bun.which("agent") },
+    // Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
+    // identity rather than trusting the name alone (see isCursorAgentAvailable).
+    { id: "cursor", name: "Cursor CLI", available: isCursorAgentAvailable() },
   ];
   const capabilitiesResponse: AgentCapabilities = {
     mode,
@@ -317,8 +344,16 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
               stdout: captureStdout ? stdoutBuf : undefined,
               cwd: jobCwd,
             });
-          } catch {
-            // Result ingestion failure shouldn't prevent job completion broadcast
+          } catch (err) {
+            // Claude/Codex are fail-open: an ingestion error is logged but does
+            // not change the terminal state. Cursor is fail-closed — its findings
+            // are prompt-enforced, so an unexpected throw here must surface as a
+            // failed job rather than a green one. (The Cursor handler normally
+            // fails by mutation and never throws; this guards future refactors.)
+            if (provider === "cursor") {
+              entry.info.status = "failed";
+              entry.info.error = err instanceof Error ? err.message : "Cursor result ingestion failed";
+            }
           }
         }
         jobOutputPaths.delete(id);
diff --git a/packages/server/cursor-review.test.ts b/packages/server/cursor-review.test.ts
index 04ba82c48..1cf501f2b 100644
--- a/packages/server/cursor-review.test.ts
+++ b/packages/server/cursor-review.test.ts
@@ -255,6 +255,14 @@ describe("formatCursorLogEvent", () => {
     expect(formatCursorLogEvent("not json")).toBeNull();
     expect(formatCursorLogEvent(ndjson({ type: "user" }).trim())).toBeNull();
   });
+
+  test("skips the final buffered assistant flush (no timestamp_ms) under partial output", () => {
+    // End-of-turn flush carries no timestamp_ms and repeats already-streamed
+    // deltas — showing it would duplicate the whole assistant output in live logs.
+    expect(
+      formatCursorLogEvent(ndjson({ type: "assistant", message: { content: "FULL FLUSH" } }).trim()),
+    ).toBeNull();
+  });
 });
 
 // ---------------------------------------------------------------------------
diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
index 2347f8221..9902d42c1 100644
--- a/packages/server/cursor-review.ts
+++ b/packages/server/cursor-review.ts
@@ -440,9 +440,12 @@ export function formatCursorLogEvent(line: string): string | null {
       return null;
     }
     case "assistant": {
-      // Apply partial-output dedup: only show real new deltas. If the event
-      // carries no timestamp_ms at all (non-partial flush), show it once.
-      if (event.timestamp_ms !== undefined && !isRealAssistantDelta(event)) return null;
+      // We always launch with --stream-partial-output, so the only assistant
+      // events worth showing live are real new deltas (timestamp_ms present,
+      // model_call_id absent). Everything else repeats already-streamed text:
+      // pre-tool-call duplicate flushes (model_call_id present) and the final
+      // buffered flush at end of turn (no timestamp_ms). Skip both.
+      if (!isRealAssistantDelta(event)) return null;
       const text = extractAssistantText(event);
       return text ? text : null;
     }
diff --git a/packages/ui/components/AgentsTab.tsx b/packages/ui/components/AgentsTab.tsx
index 73fa8c3e6..aa1b0e146 100644
--- a/packages/ui/components/AgentsTab.tsx
+++ b/packages/ui/components/AgentsTab.tsx
@@ -630,7 +630,11 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
                       <span className="text-muted-foreground/50">Findings are prompt-enforced</span>
                     </div>
                     <ConfigRow label="Model" stacked>
-                      <SelectMenu value={cursorModel} options={CURSOR_MODELS} onChange={setCursorModel} />
+                      {CURSOR_MODELS.length > 1 ? (
+                        <SelectMenu value={cursorModel} options={CURSOR_MODELS} onChange={setCursorModel} />
+                      ) : (
+                        renderStaticChoice(catalogLabel(CURSOR_MODELS, cursorModel))
+                      )}
                     </ConfigRow>
                   </>
                 )}

From a7f53fdd9e1825f6f8bd2e8eb5acd6d89389875b Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 19:29:00 -0700
Subject: [PATCH 4/7] docs(review): clarify why the stream reducer keeps the
 assistant flush the live-log formatter drops

Self-review follow-up: the assistant-dedup rule differs on purpose between
reduceCursorStreamEvents (keeps the no-timestamp end-of-turn flush as a
marker-parse safety net) and formatCursorLogEvent (drops it to avoid
duplicating output in live logs). Documents the relationship so the two aren't
mistakenly unified. Comment-only.
---
 packages/server/cursor-review.ts | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
index 9902d42c1..2b50b3cea 100644
--- a/packages/server/cursor-review.ts
+++ b/packages/server/cursor-review.ts
@@ -335,9 +335,15 @@ export function reduceCursorStreamEvents(stdout: string): CursorStreamReduction
     recordCount++;
 
     if (event.type === "assistant") {
-      // For partial streaming, only append real new deltas. If no event in the
-      // stream carries timestamp_ms (partial streaming disabled), append the
-      // full assistant text once.
+      // Append real new deltas (timestamp_ms present, model_call_id absent).
+      // The no-timestamp branch below intentionally KEEPS the end-of-turn flush
+      // — it covers both partial-streaming-disabled output (the full message
+      // arrives once) and the enabled-mode final flush, and keeping it is a
+      // parse-robustness safety net: it guarantees the marker block is present
+      // even if the deltas didn't fully carry it (extractLastMarkerBlock takes
+      // the LAST block, so the duplicate is harmless). This is deliberately MORE
+      // lenient than formatCursorLogEvent, which drops the flush so live logs
+      // don't repeat the whole assistant output. Do not "unify" the two.
       if (event.timestamp_ms !== undefined) {
         if (isRealAssistantDelta(event)) canonicalText += extractAssistantText(event);
       } else if (event.model_call_id === undefined) {

From 39f10d85cb967c435891d6ab1c03d5893b497fd6 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 20:01:04 -0700
Subject: [PATCH 5/7] feat(review): upgrade the Cursor review prompt
 (investigation-first, repo-guidance-aware)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the homemade Claude-derived prompt with an investigation-first prompt
tuned for Cursor's agentic model:
- Tells the agent to use tools — read the full module, trace call sites, check
  sibling code/tests, confirm the issue isn't already handled — before reporting.
- Discovers and honors repo guidance if present (CLAUDE.md, REVIEW.md, AGENTS.md,
  .cursor/rules/*, .cursor/BUGBOT.md root+nested). So project-specific rules flow
  in from the repo instead of being hardcoded — same prompt works for any repo,
  and a repo's BUGBOT.md serves both Cursor Bugbot and this CLI review.
- Adds an explicit 'Do NOT report' list (formatting/naming, speculative
  refactors, 'consider adding tests', exotic hypotheticals) to cut noise.
- Requires impact + trigger in each finding; one finding per distinct bug.
- Optimizes for findings a maintainer would actually fix.

Output contract unchanged: same <plannotator-review-json> marker block and the
same important/nit/pre_existing severities the parser, transform, and UI depend
on. No test asserts prompt text; typecheck + 23 tests pass.
---
 packages/server/cursor-review.ts | 127 +++++++++++++++++--------------
 1 file changed, 70 insertions(+), 57 deletions(-)

diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
index 2b50b3cea..9653939c6 100644
--- a/packages/server/cursor-review.ts
+++ b/packages/server/cursor-review.ts
@@ -115,67 +115,77 @@ export function validateCursorReviewOutput(parsed: unknown): CursorReviewOutput
 }
 
 // ---------------------------------------------------------------------------
-// Review prompt — mirrors CLAUDE_REVIEW_PROMPT, ending with the marker-block
-// output contract (Cursor has no schema flag, so the contract is the prompt).
+// Review prompt — investigation-first methodology tuned for Cursor's agentic
+// model, ending with the marker-block output contract (Cursor has no schema
+// flag, so the contract IS the prompt). Project-specific rules are NOT baked in
+// here: the agent is told to discover and honor repo guidance files at review
+// time, so this same prompt works for any repo `plannotator review` is run on.
 // ---------------------------------------------------------------------------
 
-export const CURSOR_REVIEW_PROMPT = `# Cursor Code Review System Prompt
-
-## Identity
-You are a code review system. Your job is to find bugs that would break
-production. You are not a linter, formatter, or style checker unless
-project guidance files explicitly expand your scope.
-
-## Pipeline
-
-Step 1: Gather context
-  - Retrieve the PR diff or local diff (gh pr diff, git diff, or jj diff)
-  - Read CLAUDE.md and REVIEW.md at the repo root and in every directory
-    containing modified files
-  - Build a map of which rules apply to which file paths
-  - Identify any skip rules (paths, patterns, or file types to ignore)
-
-Step 2: Review for issues
-  - Logic errors, regressions, broken edge cases, build failures, and code
-    that will produce wrong results.
-  - Security vulnerabilities with concrete exploit paths, race conditions, and
-    incorrect assumptions about trust boundaries.
-  - Code quality: unnecessary duplication, missed reuse of existing utilities,
-    overly complex implementations a senior engineer would care about.
-  - Guideline compliance: clear, unambiguous violations of CLAUDE.md / REVIEW.md
-    where you can cite the exact rule broken. Respect all skip rules.
-
-Step 3: Validate each candidate finding
-  - Trace the actual code path to confirm the issue is real.
-  - Check whether the issue is handled elsewhere (try/catch, upstream guard,
-    fallback logic, type system guarantees).
-  - If validation fails, drop the finding silently.
-  - If validation passes, write a clear reasoning chain — this becomes the
-    \`reasoning\` field.
-
-Step 4: Classify each validated finding with exactly one severity:
-
-  important — A bug that should be fixed before merging. Build failures, clear
-    logic errors, security vulnerabilities with exploit paths, data loss risks,
-    race conditions with observable consequences.
-
-  nit — A minor issue worth fixing but non-blocking. Style deviations from
-    project guidelines, code quality concerns, unlikely-but-worth-noting edge
-    cases, convention violations that don't affect correctness.
-
-  pre_existing — A bug that exists in the surrounding codebase but was NOT
-    introduced by this change. Only flag when directly relevant to the changed
-    code path.
+export const CURSOR_REVIEW_PROMPT = `# Code Review
+
+## Your role
+You are a senior engineer reviewing a code change. Find the bugs a maintainer
+would fix before merging — logic errors, regressions, broken edge cases,
+security holes with a real exploit path, data-loss risks. You are not a linter
+or style checker. Optimize for findings that get resolved, not for volume: a
+few real, well-evidenced bugs beat a long list of nits.
+
+## Method — investigate before you report
+You have tools. Use them. Do not judge the diff in isolation:
+  - Read the full function and module around each change, not just the hunk.
+  - Trace call sites and data flow to see how the changed code is actually used.
+  - Read any repo guidance and honor it if present: CLAUDE.md, REVIEW.md,
+    AGENTS.md, .cursor/rules/*, and .cursor/BUGBOT.md (root and any nested under
+    the directories you're reviewing). Treat these as authoritative for this
+    repo and respect any skip/ignore rules they define.
+  - Before reporting, check whether the issue is already handled elsewhere (a
+    guard, try/catch, fallback, type guarantee).
+  - Look at sibling code and tests: is the pattern you're flagging used safely
+    elsewhere? Is the failure path you fear already covered?
+
+## What to look for
+  - Correctness: logic errors, regressions, broken edge cases, off-by-one,
+    wrong results, build/type breakage.
+  - Robustness: unhandled errors on realistic paths, resource/process leaks,
+    races with observable consequences, state left inconsistent on failure.
+  - Security (only when the change introduces it): execution of
+    user/client-controlled input, trusting unvalidated external or model
+    output, weakened trust/read-only guarantees, newly exposed surfaces.
+  - Contracts & attribution: wrong file/line/path mapping, scope confusion, or a
+    change to one side of a contract (types, API shape, parallel
+    implementations) without the matching change on the other side.
+  - Clear, citable violations of the repo guidance files above.
+
+## Validate every finding
+Confirm each candidate is real by tracing the actual code path. If you can't
+show how it triggers, drop it — prefer silence over a false positive. The
+confirmation you write becomes the \`reasoning\` field: what triggers it, what
+breaks, and why it isn't already handled.
+
+## Severity — report what a maintainer would actually fix
+  important — should be fixed before merge: data loss, silent wrong results,
+    security with an exploit path, crashes/leaks, contract drift that breaks a
+    real consumer, fail-open where it must fail-closed.
+
+  nit — minor and non-blocking: a real but low-impact edge case, missing
+    handling on an unlikely path, a robustness gap with an easy workaround.
+
+  pre_existing — a genuine bug in surrounding code NOT introduced by this
+    change. Only flag when it directly affects the changed code path.
+
+## Do NOT report
+  - Formatting, naming, or style preferences (unless a repo guideline requires).
+  - "Consider adding tests" — unless the change adds non-trivial logic with no
+    test coverage at all.
+  - Hypothetical problems needing exotic conditions with no real code path.
+  - Speculative refactors or "this could be cleaner" outside the change's scope.
 
 ## Hard constraints
-- Never approve or block the change.
-- Never comment on formatting or code style unless guidance files say to.
-- Never flag missing test coverage unless guidance files say to.
-- Never invent rules — only enforce what CLAUDE.md or REVIEW.md state.
-- Prefer silence over false positives — when in doubt, drop the finding.
-- Do NOT modify files. This is a read-only review.
+- This is a read-only review. Do NOT modify files.
 - Do NOT post any comments to GitHub or GitLab. Do NOT use gh pr comment, glab,
   or any commenting tool.
+- Never approve or block the change. Your only output is findings.
 
 ## Output contract
 Your only machine-readable output is a single marker-delimited JSON block.
@@ -208,14 +218,17 @@ Schema:
   - line: integer (start line, post-change numbering)
   - end_line: integer (end line; equal to line for a single line)
   - severity: one of "important", "nit", "pre_existing"
-  - description: string (one paragraph)
+  - description: string (one paragraph) — state the IMPACT (what breaks, for
+    whom) and the TRIGGER (when it happens); suggest a minimal fix if obvious
   - reasoning: string (how the issue was confirmed)
 - summary: object with
   - correctness: string ("Correct" or "Issues Found")
   - explanation: string (one sentence)
   - confidence: number between 0 and 1
 
-If no issues are found, return an empty "findings" array with a valid summary.`;
+Cite file/line from the new (post-change) code. One finding per distinct bug —
+do not stack unrelated issues. If no issues are found, return an empty
+"findings" array with a valid summary.`;
 
 // ---------------------------------------------------------------------------
 // Command builder

From dce4006449f0659e90947b81615e6cf99b859861 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Sat, 13 Jun 2026 20:29:22 -0700
Subject: [PATCH 6/7] feat(review): discover Cursor models dynamically from
 `agent models`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the hardcoded single-entry Cursor model list with the real,
account-specific catalog. `agent models` returns ~139 models (current ones:
Opus 4.8, GPT-5.5, Fable 5, Gemini 3.1, Grok, etc.) in a stable `id - Label`
format — hardcoding any subset would go stale immediately.

- cursor-review.ts: parseCursorModelsOutput() — runtime-agnostic parser for the
  `agent models` text (skips header/tip, dedupes). Shared, vendored to Pi.
- agent-jobs.ts (Bun + Pi): discoverCursorModels() spawns `agent models` at
  review-server capability detection (best-effort, cached; empty when the CLI is
  unauthenticated) and attaches the catalog to the cursor AgentCapability.
- AgentCapability gains an optional `models` field (deliberate, per approach
  doc — not smuggled). Cursor capability detection now also gated to review mode.
- UI: AgentsTab drives the model picker from capabilities.cursor.models, falling
  back to an `auto`-only list when none are reported. Default model id is now
  `auto` (lowercase, matching the real catalog); buildCursorCommand omits
  --model for `auto` case-insensitively.

Verified the parser against the live CLI: parses all 139 models. Typecheck
green; 61 tests pass (incl. 3 new parser tests). Binary rebuilt.
---
 apps/pi-extension/server/agent-jobs.ts | 33 +++++++++++++++++++---
 packages/server/agent-jobs.ts          | 34 ++++++++++++++++++++---
 packages/server/cursor-review.test.ts  | 37 +++++++++++++++++++++++++
 packages/server/cursor-review.ts       | 38 +++++++++++++++++++++++++-
 packages/shared/agent-jobs.ts          |  7 +++++
 packages/ui/components/AgentsTab.tsx   | 30 ++++++++++++++------
 packages/ui/hooks/useAgentSettings.ts  |  4 ++-
 7 files changed, 164 insertions(+), 19 deletions(-)

diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
index e3742fba4..24f91b123 100644
--- a/apps/pi-extension/server/agent-jobs.ts
+++ b/apps/pi-extension/server/agent-jobs.ts
@@ -21,7 +21,7 @@ import {
 	AGENT_HEARTBEAT_INTERVAL_MS,
 } from "../generated/agent-jobs.js";
 import { formatClaudeLogEvent } from "../generated/claude-review.js";
-import { formatCursorLogEvent } from "../generated/cursor-review.js";
+import { formatCursorLogEvent, parseCursorModelsOutput, type CursorModel } from "../generated/cursor-review.js";
 import { json, parseBody } from "./helpers.js";
 
 // ---------------------------------------------------------------------------
@@ -116,6 +116,24 @@ function isCursorAgentAvailable(): boolean {
 	}
 }
 
+/**
+ * Best-effort Cursor model catalog from `agent models`, parsed once. Empty when
+ * discovery fails or the CLI is unauthenticated — the UI falls back to an
+ * `auto`-only picker. Account-specific, so never hardcoded.
+ */
+function discoverCursorModels(): CursorModel[] {
+	try {
+		const out = execFileSync("agent", ["models"], {
+			timeout: 5000,
+			stdio: ["ignore", "pipe", "ignore"],
+			encoding: "utf8",
+		});
+		return parseCursorModelsOutput(out);
+	} catch {
+		return [];
+	}
+}
+
 export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 	const { mode, getServerUrl, getCwd } = options;
 
@@ -126,13 +144,20 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 	let version = 0;
 
 	// --- Capability detection (run once) ---
+	// Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
+	// identity rather than trusting the name alone. When present, also discover
+	// its account-specific model catalog so the UI doesn't hardcode model ids.
+	const cursorAvailable = mode === "review" && isCursorAgentAvailable();
 	const capabilities: AgentCapability[] = [
 		{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
 		{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
 		{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
-		// Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
-		// identity rather than trusting the name alone (see isCursorAgentAvailable).
-		{ id: "cursor", name: "Cursor CLI", available: isCursorAgentAvailable() },
+		{
+			id: "cursor",
+			name: "Cursor CLI",
+			available: cursorAvailable,
+			...(cursorAvailable ? { models: discoverCursorModels() } : {}),
+		},
 	];
 	const capabilitiesResponse: AgentCapabilities = {
 		mode,
diff --git a/packages/server/agent-jobs.ts b/packages/server/agent-jobs.ts
index 9750c7432..653aeef97 100644
--- a/packages/server/agent-jobs.ts
+++ b/packages/server/agent-jobs.ts
@@ -9,7 +9,7 @@
  */
 
 import { formatClaudeLogEvent } from "./claude-review";
-import { formatCursorLogEvent } from "./cursor-review";
+import { formatCursorLogEvent, parseCursorModelsOutput, type CursorModel } from "./cursor-review";
 import {
   type AgentJobInfo,
   type AgentJobEvent,
@@ -131,6 +131,25 @@ function isCursorAgentAvailable(): boolean {
   }
 }
 
+/**
+ * Best-effort Cursor model catalog from `agent models`, parsed once and cached.
+ * Empty when discovery fails or the CLI is unauthenticated — the UI falls back
+ * to an `auto`-only picker. Account-specific, so never hardcoded.
+ */
+function discoverCursorModels(): CursorModel[] {
+  try {
+    const res = Bun.spawnSync(["agent", "models"], {
+      stdout: "pipe",
+      stderr: "ignore",
+      timeout: 5000,
+    });
+    if (!res.success) return [];
+    return parseCursorModelsOutput(new TextDecoder().decode(res.stdout));
+  } catch {
+    return [];
+  }
+}
+
 export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJobHandler {
   const { mode, getServerUrl, getCwd } = options;
 
@@ -142,13 +161,20 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
   let version = 0;
 
   // --- Capability detection (run once) ---
+  // Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
+  // identity rather than trusting the name alone. When present, also discover
+  // its account-specific model catalog so the UI doesn't hardcode model ids.
+  const cursorAvailable = mode === "review" && isCursorAgentAvailable();
   const capabilities: AgentCapability[] = [
     { id: "claude", name: "Claude Code", available: !!Bun.which("claude") },
     { id: "codex", name: "Codex CLI", available: !!Bun.which("codex") },
     { id: "tour", name: "Code Tour", available: !!Bun.which("claude") || !!Bun.which("codex") },
-    // Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
-    // identity rather than trusting the name alone (see isCursorAgentAvailable).
-    { id: "cursor", name: "Cursor CLI", available: isCursorAgentAvailable() },
+    {
+      id: "cursor",
+      name: "Cursor CLI",
+      available: cursorAvailable,
+      ...(cursorAvailable ? { models: discoverCursorModels() } : {}),
+    },
   ];
   const capabilitiesResponse: AgentCapabilities = {
     mode,
diff --git a/packages/server/cursor-review.test.ts b/packages/server/cursor-review.test.ts
index 1cf501f2b..c1feb0cb4 100644
--- a/packages/server/cursor-review.test.ts
+++ b/packages/server/cursor-review.test.ts
@@ -8,6 +8,7 @@ import {
   formatCursorLogEvent,
   transformCursorFindings,
   validateCursorReviewOutput,
+  parseCursorModelsOutput,
   type CursorFinding,
   type CursorReviewOutput,
 } from "./cursor-review";
@@ -315,3 +316,39 @@ describe("transformCursorFindings", () => {
     expect(transformCursorFindings(f, "src", "/repo")[0].lineEnd).toBe(7);
   });
 });
+
+describe("parseCursorModelsOutput", () => {
+  test("parses `id - Label` lines and skips the header + tip", () => {
+    const out = [
+      "Available models",
+      "",
+      "auto - Auto",
+      "gpt-5.2 - GPT-5.2",
+      "claude-opus-4-8-thinking-high - Opus 4.8 1M Thinking",
+      "composer-2.5 - Composer 2.5 (current)",
+      "",
+      "Tip: use --model <id> (or /model <id> in interactive mode) to switch.",
+    ].join("\n");
+    const models = parseCursorModelsOutput(out);
+    expect(models).toEqual([
+      { id: "auto", label: "Auto" },
+      { id: "gpt-5.2", label: "GPT-5.2" },
+      { id: "claude-opus-4-8-thinking-high", label: "Opus 4.8 1M Thinking" },
+      { id: "composer-2.5", label: "Composer 2.5 (current)" },
+    ]);
+  });
+
+  test("returns [] for unauthenticated / empty output", () => {
+    expect(parseCursorModelsOutput("No models available for this account.")).toEqual([]);
+    expect(parseCursorModelsOutput("")).toEqual([]);
+    expect(parseCursorModelsOutput("Not logged in")).toEqual([]);
+  });
+
+  test("dedupes repeated ids, keeping first", () => {
+    const models = parseCursorModelsOutput("auto - Auto\nauto - Auto Again\ngpt-5.2 - GPT-5.2");
+    expect(models).toEqual([
+      { id: "auto", label: "Auto" },
+      { id: "gpt-5.2", label: "GPT-5.2" },
+    ]);
+  });
+});
diff --git a/packages/server/cursor-review.ts b/packages/server/cursor-review.ts
index 9653939c6..19e9e0ce3 100644
--- a/packages/server/cursor-review.ts
+++ b/packages/server/cursor-review.ts
@@ -253,7 +253,8 @@ export interface CursorCommandResult {
  * to the launch cwd when provided, matching the spawn cwd.
  */
 export function buildCursorCommand(prompt: string, model?: string, cwd?: string): CursorCommandResult {
-  const useModel = !!model && model !== "Auto";
+  // `auto` is Cursor's default model id — omit --model so the CLI chooses.
+  const useModel = !!model && model.toLowerCase() !== "auto";
 
   return {
     command: [
@@ -275,6 +276,41 @@ export function buildCursorCommand(prompt: string, model?: string, cwd?: string)
   };
 }
 
+// ---------------------------------------------------------------------------
+// Model discovery — parse `agent models` output. The spawn is runtime-specific
+// (it lives in each server's agent-jobs adapter); this pure parser is shared.
+// ---------------------------------------------------------------------------
+
+export interface CursorModel {
+  id: string;
+  label: string;
+}
+
+/**
+ * Parse the text output of `agent models` / `agent --list-models` into a model
+ * catalog. The CLI prints one model per line as `<id> - <Label>`, wrapped by an
+ * "Available models" header and a "Tip: ..." footer. Returns [] when the output
+ * carries no model lines (e.g. unauthenticated: "No models available...").
+ */
+export function parseCursorModelsOutput(stdout: string): CursorModel[] {
+  if (!stdout) return [];
+  const models: CursorModel[] = [];
+  const seen = new Set<string>();
+  for (const rawLine of stdout.split("\n")) {
+    const line = rawLine.trim();
+    // `id - Label` — id is a single whitespace-free token; the separator is
+    // " - " with surrounding spaces (model ids contain hyphens but never " - ").
+    const match = /^(\S+)\s+-\s+(.+)$/.exec(line);
+    if (!match) continue;
+    const id = match[1];
+    const label = match[2].trim();
+    if (!id || seen.has(id)) continue;
+    seen.add(id);
+    models.push({ id, label });
+  }
+  return models;
+}
+
 // ---------------------------------------------------------------------------
 // Stream reduction — line-buffered NDJSON reducer.
 // ---------------------------------------------------------------------------
diff --git a/packages/shared/agent-jobs.ts b/packages/shared/agent-jobs.ts
index 1cfedf391..a4cc9ccf1 100644
--- a/packages/shared/agent-jobs.ts
+++ b/packages/shared/agent-jobs.ts
@@ -81,6 +81,13 @@ export interface AgentCapability {
   id: string;
   name: string;
   available: boolean;
+  /**
+   * Provider-discovered model catalog (currently only Cursor). Best-effort and
+   * account-specific — populated from the provider CLI at capability-detection
+   * time, empty when discovery fails or the CLI is unauthenticated. The UI
+   * drives its model picker from this instead of a hardcoded list.
+   */
+  models?: { id: string; label: string }[];
 }
 
 export interface AgentCapabilities {
diff --git a/packages/ui/components/AgentsTab.tsx b/packages/ui/components/AgentsTab.tsx
index aa1b0e146..a7e4a9195 100644
--- a/packages/ui/components/AgentsTab.tsx
+++ b/packages/ui/components/AgentsTab.tsx
@@ -68,10 +68,13 @@ const TOUR_CLAUDE_MODELS: Array<{ value: string; label: string }> = [
   { value: 'opus', label: 'Opus (thorough)' },
 ];
 
-// Cursor v1 minimal catalog — just Auto. Model discovery waits on a
-// provider-metadata contract (see approach doc § Model Discovery).
+// Fallback Cursor model catalog (just `auto`). The real, account-specific list
+// is discovered server-side via `agent models` and delivered on the cursor
+// capability; the component prefers that and only falls back to this when the
+// server reports no models (e.g. unauthenticated CLI). Used by formatModel for
+// job-card labels where the live list isn't threaded.
 const CURSOR_MODELS: Array<{ value: string; label: string }> = [
-  { value: 'Auto', label: 'Auto' },
+  { value: 'auto', label: 'Auto' },
 ];
 
 const MODE_LABEL: Record<AgentMode, string> = {
@@ -418,6 +421,15 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
   const tourAvailable = capabilities?.providers.some((p) => p.id === 'tour' && p.available) ?? false;
   const cursorAvailable = capabilities?.providers.some((p) => p.id === 'cursor' && p.available) ?? false;
 
+  // Cursor's model catalog is account-specific and discovered server-side, so
+  // prefer the live list from the capability; fall back to `auto`-only when the
+  // server reports none (e.g. unauthenticated CLI).
+  const cursorModels = useMemo<Array<{ value: string; label: string }>>(() => {
+    const discovered = capabilities?.providers.find((p) => p.id === 'cursor')?.models ?? [];
+    const opts = discovered.map((m) => ({ value: m.id, label: m.label }));
+    return opts.length > 0 ? opts : CURSOR_MODELS;
+  }, [capabilities]);
+
   // Tour engines (narrow union). Cursor is NOT included here — it is review-only.
   const availableEngines = useMemo<AgentEngine[]>(() => {
     const engines: AgentEngine[] = [];
@@ -504,12 +516,12 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
       return { provider: 'claude', label: 'Code Review', model: claudeModel, effort: claudeEffort };
     }
     if (engine === 'cursor') {
-      // Omission ⇒ Auto: drop model client-side when Auto so the POST carries
-      // no model and the server applies Cursor's default behavior.
+      // Omission ⇒ auto: drop the model client-side when it's `auto` so the POST
+      // carries no model and the server lets Cursor pick its default.
       return {
         provider: 'cursor',
         label: 'Code Review',
-        ...(cursorModel && cursorModel !== 'Auto' ? { model: cursorModel } : {}),
+        ...(cursorModel && cursorModel.toLowerCase() !== 'auto' ? { model: cursorModel } : {}),
       };
     }
     return {
@@ -630,10 +642,10 @@ export const AgentsTab: React.FC<AgentsTabProps> = ({
                       <span className="text-muted-foreground/50">Findings are prompt-enforced</span>
                     </div>
                     <ConfigRow label="Model" stacked>
-                      {CURSOR_MODELS.length > 1 ? (
-                        <SelectMenu value={cursorModel} options={CURSOR_MODELS} onChange={setCursorModel} />
+                      {cursorModels.length > 1 ? (
+                        <SelectMenu value={cursorModel} options={cursorModels} onChange={setCursorModel} />
                       ) : (
-                        renderStaticChoice(catalogLabel(CURSOR_MODELS, cursorModel))
+                        renderStaticChoice(catalogLabel(cursorModels, cursorModel))
                       )}
                     </ConfigRow>
                   </>
diff --git a/packages/ui/hooks/useAgentSettings.ts b/packages/ui/hooks/useAgentSettings.ts
index 37fbc00f0..e10821914 100644
--- a/packages/ui/hooks/useAgentSettings.ts
+++ b/packages/ui/hooks/useAgentSettings.ts
@@ -13,7 +13,9 @@ export const DEFAULT_TOUR_CLAUDE_EFFORT = 'medium';
 export const DEFAULT_TOUR_CODEX_MODEL = 'gpt-5.3-codex';
 export const DEFAULT_TOUR_CODEX_REASONING = 'medium';
 export const DEFAULT_TOUR_CODEX_FAST = false;
-export const DEFAULT_CURSOR_MODEL = 'Auto';
+// `auto` is Cursor's own default model id (from `agent models`); lowercase so it
+// matches the discovered catalog and the buildCursorCommand omit-`--model` check.
+export const DEFAULT_CURSOR_MODEL = 'auto';
 
 interface ClaudeSection {
   model: string;

From bdba131e69771bfc12c48265544adb1fb23ebe51 Mon Sep 17 00:00:00 2001
From: Michael Ramos <mdramos8@gmail.com>
Date: Tue, 23 Jun 2026 06:47:56 -0700
Subject: [PATCH 7/7] refactor(review): drop the Cursor identity probe, just
 check PATH like Claude/Codex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes isCursorAgentAvailable() (the `agent about --format json` identity
check) from both runtimes. Cursor availability is now `which agent`, matching
how Claude (`which claude`) and Codex (`which codex`) are detected — less code,
consistent, and it removes the keychain/auth coupling that hid Cursor on
headless/SSH/API-key sessions where `agent about` fails but `agent -p` works.
Model discovery (`agent models`) is unchanged.
---
 apps/pi-extension/server/agent-jobs.ts | 29 +++--------------------
 packages/server/agent-jobs.ts          | 32 +++-----------------------
 2 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts
index 24f91b123..f426826ac 100644
--- a/apps/pi-extension/server/agent-jobs.ts
+++ b/apps/pi-extension/server/agent-jobs.ts
@@ -94,28 +94,6 @@ export interface AgentJobHandlerOptions {
 	onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
 }
 
-/**
- * `agent` is a very generic binary name — other tools ship binaries called
- * `agent` too, so finding one on PATH does not prove it is Cursor. Verify its
- * identity before offering Cursor as a provider. Cheap one-time probe: `agent
- * about --format json` is fast, works unauthenticated, and only the Cursor CLI
- * emits a JSON object with a `cliVersion` field. Any failure → not Cursor.
- */
-function isCursorAgentAvailable(): boolean {
-	if (!whichCmd("agent")) return false;
-	try {
-		const out = execFileSync("agent", ["about", "--format", "json"], {
-			timeout: 3000,
-			stdio: ["ignore", "pipe", "ignore"],
-			encoding: "utf8",
-		});
-		const parsed = JSON.parse(out) as { cliVersion?: unknown };
-		return parsed != null && typeof parsed.cliVersion === "string";
-	} catch {
-		return false;
-	}
-}
-
 /**
  * Best-effort Cursor model catalog from `agent models`, parsed once. Empty when
  * discovery fails or the CLI is unauthenticated — the UI falls back to an
@@ -144,10 +122,9 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
 	let version = 0;
 
 	// --- Capability detection (run once) ---
-	// Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
-	// identity rather than trusting the name alone. When present, also discover
-	// its account-specific model catalog so the UI doesn't hardcode model ids.
-	const cursorAvailable = mode === "review" && isCursorAgentAvailable();
+	// Cursor CLI's binary is literally named `agent` (NOT `cursor`). When present,
+	// discover its account-specific model catalog so the UI doesn't hardcode ids.
+	const cursorAvailable = mode === "review" && whichCmd("agent");
 	const capabilities: AgentCapability[] = [
 		{ id: "claude", name: "Claude Code", available: whichCmd("claude") },
 		{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
diff --git a/packages/server/agent-jobs.ts b/packages/server/agent-jobs.ts
index 653aeef97..c684580eb 100644
--- a/packages/server/agent-jobs.ts
+++ b/packages/server/agent-jobs.ts
@@ -105,31 +105,6 @@ export interface AgentJobHandlerOptions {
   onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
 }
 
-/**
- * `agent` is a very generic binary name — other tools ship binaries called
- * `agent` too, so finding one on PATH does not prove it is Cursor. Confirm its
- * identity before offering Cursor as a provider; otherwise selecting Cursor
- * could spawn an unrelated tool with review content as an argument.
- *
- * Cheap one-time probe: `agent about --format json` is fast, works
- * unauthenticated, and only the Cursor CLI emits a JSON object with a
- * `cliVersion` field. Any failure/timeout/mismatch → treat as not Cursor.
- */
-function isCursorAgentAvailable(): boolean {
-  if (!Bun.which("agent")) return false;
-  try {
-    const res = Bun.spawnSync(["agent", "about", "--format", "json"], {
-      stdout: "pipe",
-      stderr: "ignore",
-      timeout: 3000,
-    });
-    if (!res.success) return false;
-    const parsed = JSON.parse(new TextDecoder().decode(res.stdout)) as { cliVersion?: unknown };
-    return parsed != null && typeof parsed.cliVersion === "string";
-  } catch {
-    return false;
-  }
-}
 
 /**
  * Best-effort Cursor model catalog from `agent models`, parsed once and cached.
@@ -161,10 +136,9 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob
   let version = 0;
 
   // --- Capability detection (run once) ---
-  // Cursor CLI's binary is literally named `agent` (NOT `cursor`), so verify
-  // identity rather than trusting the name alone. When present, also discover
-  // its account-specific model catalog so the UI doesn't hardcode model ids.
-  const cursorAvailable = mode === "review" && isCursorAgentAvailable();
+  // Cursor CLI's binary is literally named `agent` (NOT `cursor`). When present,
+  // discover its account-specific model catalog so the UI doesn't hardcode ids.
+  const cursorAvailable = mode === "review" && !!Bun.which("agent");
   const capabilities: AgentCapability[] = [
     { id: "claude", name: "Claude Code", available: !!Bun.which("claude") },
     { id: "codex", name: "Codex CLI", available: !!Bun.which("codex") },