diff --git a/.task b/.task index 4f33323..d3a31c8 100644 --- a/.task +++ b/.task @@ -1,10 +1,10 @@ { - "taskId": "1282", + "taskId": "2123", "phase": "execution", - "fenceToken": 3, - "sessionId": "c893aa20-a7b7-4112-9cc3-68c12a747bf1", - "journalPath": "/tmp/taskcore-worktrees/journal-T1282/tasks/T1282/", - "codeWorktree": "/tmp/taskcore-worktrees/code-T1282", - "claimedAt": 1773098423616, + "fenceToken": 10, + "sessionId": "7be3c6a7-358e-45c7-b5af-6c1c5a51b82e", + "journalPath": "/tmp/taskcore-worktrees/journal-T2123/tasks/T2123/", + "codeWorktree": "/tmp/taskcore-worktrees/code-T2123", + "claimedAt": 1773404837466, "reviewNotes": [] } diff --git a/core/cli/plan-parse.ts b/core/cli/plan-parse.ts new file mode 100644 index 0000000..1da2167 --- /dev/null +++ b/core/cli/plan-parse.ts @@ -0,0 +1,258 @@ +/** + * plan-parse.ts — parse markdown-ish plans into child task specs and allocate costs. + * + * Supports: + * - Checklist items: `- [ ] title` or `- [x] title` + * - Ordered items: `1. title` + * - Bullet items: `- title` + * - Headings: `# Section` — prefix subsequent items' titles with "Section: " + * - Continuation lines: indented (2+ spaces) lines after an item enrich its description + * - Trailing meta: `(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true)` + */ + +export interface ParsedItem { + title: string; + description: string; + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +export interface AllocatedItem { + title: string; + description: string; + cost: number; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +interface ItemMeta { + cost?: number; + assignee?: string; + reviewer?: string; + skipAnalysis?: boolean; +} + +function extractMeta(raw: string): { baseTitle: string; meta: ItemMeta } { + // Match trailing parenthetical: `title (key: val, key: val)` + const parenMatch = raw.match(/^(.*?)\s*\(([^)]+)\)\s*$/); + if (!parenMatch) return { baseTitle: raw.trim(), meta: {} }; + + const baseTitle = parenMatch[1]!.trim(); + const metaStr = parenMatch[2]!; + const meta: ItemMeta = {}; + + for (const pair of metaStr.split(",")) { + const colonIdx = pair.indexOf(":"); + if (colonIdx < 0) continue; + const key = pair.slice(0, colonIdx).trim().toLowerCase(); + const value = pair.slice(colonIdx + 1).trim(); + + switch (key) { + case "cost": { + const n = Number(value); + if (Number.isFinite(n) && n > 0) meta.cost = n; + break; + } + case "assignee": + if (value) meta.assignee = value; + break; + case "reviewer": + if (value) meta.reviewer = value; + break; + case "skip-analysis": + meta.skipAnalysis = value === "true" || value === "1" || value === "yes"; + break; + } + } + return { baseTitle, meta }; +} + +interface PendingItem { + title: string; + rawTitle: string; // item text without heading prefix, used as default description + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +/** + * Parse a markdown-ish plan text into ParsedItem[]. + * + * Item markers: + * - `- [ ] title` / `- [x] title` — checklist + * - `1. title` — ordered list + * - `- title` — plain bullet + * + * Headings (`# …`) set a context prefix applied to all following items until + * the next heading. + * + * Indented lines (2+ spaces) following an item are appended to its description. + * + * A trailing parenthetical `(key: value, …)` on any item line supplies metadata. + */ +export function parsePlan(text: string): ParsedItem[] { + const lines = text.split("\n"); + const items: ParsedItem[] = []; + let headingContext = ""; + let pending: PendingItem | null = null; + const descLines: string[] = []; + + function flush(): void { + if (!pending) return; + const description = descLines.length > 0 ? descLines.join("\n") : pending.rawTitle; + items.push({ + title: pending.title, + description, + cost: pending.cost, + assignee: pending.assignee, + reviewer: pending.reviewer, + skipAnalysis: pending.skipAnalysis, + }); + pending = null; + descLines.length = 0; + } + + for (const line of lines) { + const trimmed = line.trim(); + + // Blank lines — skip (don't flush; a blank between item and continuation is fine) + if (trimmed === "") continue; + + // Heading: reset context prefix + const headingMatch = trimmed.match(/^#{1,6}\s+(.+)$/); + if (headingMatch) { + flush(); + headingContext = headingMatch[1]!.trim(); + continue; + } + + // Checklist: `- [ ] title` or `- [x] title` (any char inside brackets) + const checklistMatch = trimmed.match(/^-\s+\[[^\]]*\]\s+(.+)$/); + // Ordered: `1. title` (must not have already matched checklist) + const orderedMatch = !checklistMatch ? trimmed.match(/^\d+\.\s+(.+)$/) : null; + // Bullet: `- title` (only when neither of the above matched) + const bulletMatch = !checklistMatch && !orderedMatch ? trimmed.match(/^-\s+(.+)$/) : null; + + const itemText = checklistMatch?.[1] ?? orderedMatch?.[1] ?? bulletMatch?.[1]; + + if (itemText !== undefined) { + flush(); + const { baseTitle, meta } = extractMeta(itemText); + const prefix = headingContext ? `${headingContext}: ` : ""; + pending = { + title: prefix + baseTitle, + rawTitle: baseTitle, + cost: meta.cost, + assignee: meta.assignee, + reviewer: meta.reviewer, + skipAnalysis: meta.skipAnalysis ?? false, + }; + continue; + } + + // Continuation: indented line (2+ spaces or a tab) while we have a pending item + if (pending && /^[ \t]{2,}/.test(line)) { + descLines.push(trimmed); + continue; + } + + // Unrecognised line — flush pending (don't silently swallow item boundaries) + flush(); + } + + flush(); + return items; +} + +/** + * Allocate costs to items. Items with an explicit `cost` keep it; items without + * share the remaining budget evenly. Uses integer arithmetic (cents) to avoid + * float drift. + * + * Returns an error string on failure, or the allocated items on success. + */ +export function allocateCosts( + items: ParsedItem[], + budgetRemainingDollars: number, +): { ok: true; items: AllocatedItem[] } | { ok: false; error: string } { + if (items.length === 0) { + return { ok: false, error: "plan contains no items" }; + } + + // Work in integer cents to avoid float drift + const budgetCents = Math.round(budgetRemainingDollars * 100); + if (budgetCents <= 0) { + return { + ok: false, + error: `no positive budget remaining (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + let specifiedCents = 0; + let unspecifiedCount = 0; + for (const item of items) { + if (item.cost !== undefined) { + specifiedCents += Math.round(item.cost * 100); + } else { + unspecifiedCount++; + } + } + + if (specifiedCents > budgetCents) { + return { + ok: false, + error: + `explicit costs (${(specifiedCents / 100).toFixed(2)}) exceed remaining budget` + + ` (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + const remainingCents = budgetCents - specifiedCents; + let perItemCents = 0; + let extraCents = 0; + + if (unspecifiedCount > 0) { + if (remainingCents <= 0) { + return { + ok: false, + error: `no budget remains for ${unspecifiedCount} item(s) without explicit cost`, + }; + } + perItemCents = Math.floor(remainingCents / unspecifiedCount); + extraCents = remainingCents % unspecifiedCount; + if (perItemCents === 0) { + return { + ok: false, + error: + `remaining budget (${(remainingCents / 100).toFixed(2)}) too small to distribute` + + ` across ${unspecifiedCount} uncosted item(s)`, + }; + } + } + + // Distribute extra cents to the first items (deterministic, no float drift) + let extraGiven = 0; + const allocated: AllocatedItem[] = items.map((item) => { + let costCents: number; + if (item.cost !== undefined) { + costCents = Math.round(item.cost * 100); + } else { + costCents = perItemCents + (extraGiven < extraCents ? 1 : 0); + if (extraGiven < extraCents) extraGiven++; + } + return { + title: item.title, + description: item.description, + cost: costCents / 100, + assignee: item.assignee, + reviewer: item.reviewer, + skipAnalysis: item.skipAnalysis, + }; + }); + + return { ok: true, items: allocated }; +} diff --git a/core/cli/task.ts b/core/cli/task.ts index 19192a2..9d71699 100644 --- a/core/cli/task.ts +++ b/core/cli/task.ts @@ -11,6 +11,7 @@ import { filterClaimableTasks, includeReviewQueueTask, } from "./claimability.js"; +import { allocateCosts, parsePlan } from "./plan-parse.js"; // Handle EPIPE errors gracefully (e.g., when piping to head) process.stdout.on("error", (err: NodeJS.ErrnoException) => { @@ -535,7 +536,9 @@ function phaseGuidance(phase: string, condition: string): string[] { case "decomposition.active": return [ "You're decomposing this task. Next steps:", - " task decompose start # begin decomposition", + " task plan --file plan.md # materialize a whole checklist in one shot", + " task decompose plan --stdin # same flow, reading markdown from stdin", + " task decompose start # fallback: build children incrementally", " task decompose add <...> # add a child task", " task decompose commit # finalize children", " task decompose cancel # cancel decomposition", @@ -768,7 +771,8 @@ function printHelp(): void { " task update ", " task analyze", " task decide ", - " task decompose ...", + " task plan | --file | --stdin # plan-based decomposition", + " task decompose ...", " task review ...", " task journal ...", " task worktree", @@ -1520,10 +1524,12 @@ async function cmdClaim(argv: string[], jsonMode: boolean): Promise { process.stdout.write(" task review reject --force — reject permanently (prefer request-changes)\n"); process.stdout.write(" task review request-changes \"notes\" — request changes\n"); } else if (phase === "decomposition") { - process.stdout.write(" task decompose start — begin decomposition\n"); - process.stdout.write(" task decompose add \"...\" — add a child task\n"); - process.stdout.write(" task decompose commit — finalize children\n"); - process.stdout.write(" task decompose cancel — cancel decomposition\n"); + process.stdout.write(" task plan --file plan.md — commit a whole checklist in one shot\n"); + process.stdout.write(" task decompose plan --stdin — same flow, reading markdown from stdin\n"); + process.stdout.write(" task decompose start — fallback: begin incremental decomposition\n"); + process.stdout.write(" task decompose add \"...\" — add a child task\n"); + process.stdout.write(" task decompose commit — finalize children\n"); + process.stdout.write(" task decompose cancel — cancel decomposition\n"); } else { process.stdout.write(" task submit \"what you did\" — when done\n"); process.stdout.write(" task complete \"evidence\" — done, no review needed\n"); @@ -2005,9 +2011,221 @@ async function cmdDecompose(argv: string[], jsonMode: boolean): Promise { return; } + case "plan": { + const { flags } = parseFlags(args); + + // Read plan text from --items, --file, or --stdin + let planText: string; + const itemsFlag = getFlagString(flags, "items"); + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (itemsFlag !== undefined) { + planText = itemsFlag; + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task decompose plan --items | --file | --stdin\n" + + " [--strategy sequential|parallel]", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan into items + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); + } + + // Fetch parent task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs (explicit + auto-distribution) + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array for the one-shot endpoint + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write("--- Decomposition committed ---\n\n"); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} children:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); + return; + } + default: - throw new CliError("Usage: task decompose ...", 1); + throw new CliError("Usage: task decompose ...", 1); + } +} + +/** + * `task plan` — top-level shortcut for plan-based decomposition. + * + * Usage: + * task plan "- [ ] step one\n- [ ] step two" + * task plan --file plan.md + * task plan --stdin + * task plan "- step one" --strategy parallel + * + * Equivalent to `task decompose plan` but more natural for agents. + * Accepts plan text as a positional arg (most common), --file, or --stdin. + */ +async function cmdPlan(argv: string[], jsonMode: boolean): Promise { + requireAgentId(); + const taskId = currentTaskId(); + const { positionals, flags } = parseFlags(argv); + + // Read plan text from positional arg, --file, or --stdin + let planText: string; + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (positionals.length > 0) { + planText = positionals.join(" "); + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task plan | --file | --stdin\n" + + " [--strategy sequential|parallel]\n" + + "\n" + + "Plan format:\n" + + " - [ ] Step title (cost: 10, assignee: coder)\n" + + " 1. Ordered step\n" + + " - Bullet step\n" + + " # Section heading\n" + + " Indented lines extend item descriptions.", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); } + + // Fetch task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write(`--- Plan materialized for T${taskId} ---\n\n`); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} child tasks:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + process.stdout.write("\nParent task is now a coordinator. Children will execute the plan.\n"); + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); } function ensureContextWithTaskId(taskId: string): TaskContext { @@ -2606,6 +2824,27 @@ const subcommandHelp: Record = { " decompose Task will be split into subtasks", ].join("\n"), + plan: [ + "task plan | --file | --stdin", + "", + "One-shot plan-based decomposition. Parses a markdown plan into", + "child tasks and materializes them under the current task.", + "Equivalent to `task decompose plan` but accepts plan as a positional arg.", + "", + "Options:", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format:", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", + ].join("\n"), + decompose: [ "task decompose ...", "", @@ -2620,6 +2859,19 @@ const subcommandHelp: Record = { " --skip-analysis Skip analysis phase for child", " commit Commit the decomposition", " cancel Cancel pending decomposition", + " plan One-shot plan-based decomposition", + " --items Inline plan text", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format (for 'plan' subcommand):", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", ].join("\n"), review: [ @@ -2799,6 +3051,9 @@ async function run(argv: string[]): Promise { case "decide": await cmdDecide(rest, jsonMode); return; + case "plan": + await cmdPlan(rest, jsonMode); + return; case "decompose": await cmdDecompose(rest, jsonMode); return; diff --git a/core/clock.ts b/core/clock.ts index f77ff86..4809ff5 100644 --- a/core/clock.ts +++ b/core/clock.ts @@ -8,6 +8,7 @@ import { type Event, type LeaseExpired, type PhaseTransition, + type RetryScheduled, type SystemState, type Task, type TaskExhausted, @@ -18,6 +19,14 @@ function backoffDue(task: Task, now: number): boolean { return task.condition === "retryWait" && task.retryAfter !== null && task.retryAfter <= now; } +function hasActiveAgent(task: Task): boolean { + return typeof task.leasedTo === "string" && task.leasedTo.trim().length > 0; +} + +function malformedActiveLease(task: Task): boolean { + return task.condition === "active" && (!hasActiveAgent(task) || task.leaseExpiresAt === null); +} + function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: boolean; anyDone: boolean } { if (task.children.length === 0) { return { allTerminal: false, anyDone: false }; @@ -39,6 +48,7 @@ function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: b export class CoreClock { private readonly sourceId: string; + private static readonly LEAKED_LEASE_BACKOFF_MS = 1_000; public constructor(sourceId = "core-clock") { this.sourceId = sourceId; @@ -103,11 +113,22 @@ export class CoreClock { } } - if ( - task.condition === "active" && - task.leaseExpiresAt !== null && - task.leaseExpiresAt <= now - ) { + if (malformedActiveLease(task)) { + const retryScheduled: RetryScheduled = { + type: "RetryScheduled", + taskId: task.id, + ts: now, + fenceToken: task.currentFenceToken, + reason: "orphaned_on_restart", + retryAfter: now + CoreClock.LEAKED_LEASE_BACKOFF_MS, + phase: task.phase, + attemptNumber: Math.max(1, task.attempts[task.phase].used), + }; + due.push(retryScheduled); + continue; + } + + if (task.condition === "active" && task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) { const leaseExpired: LeaseExpired = { type: "LeaseExpired", taskId: task.id, diff --git a/core/test/cli-plan.test.ts b/core/test/cli-plan.test.ts new file mode 100644 index 0000000..4d50269 --- /dev/null +++ b/core/test/cli-plan.test.ts @@ -0,0 +1,221 @@ +import * as http from "node:http"; +import { spawnSync } from "node:child_process"; +import { beforeEach, afterEach, test } from "node:test"; +import * as assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { OrchestrationCore } from "../index.js"; +import { createHttpServer } from "../../middle/http.js"; +import { loadConfig, type Config } from "../../middle/config.js"; +import { initJournalRepo } from "../../middle/journal.js"; + +let server: http.Server; +let core: OrchestrationCore; +let tmpDir: string; +let port: number; +let config: Config; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); + +function request( + method: string, + urlPath: string, + body?: unknown, +): Promise<{ status: number; body: unknown }> { + return new Promise((resolve, reject) => { + const data = body ? JSON.stringify(body) : undefined; + const req = http.request( + { + hostname: "127.0.0.1", + port, + path: urlPath, + method, + headers: data + ? { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + } + : {}, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (chunk: Buffer) => chunks.push(chunk)); + res.on("end", () => { + const raw = Buffer.concat(chunks).toString("utf-8"); + try { + resolve({ status: res.statusCode ?? 500, body: JSON.parse(raw) }); + } catch { + resolve({ status: res.statusCode ?? 500, body: raw }); + } + }); + }, + ); + req.on("error", reject); + if (data) req.write(data); + req.end(); + }); +} + +async function setup(): Promise { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-cli-plan-")); + const dbPath = path.join(tmpDir, "test.db"); + port = 19800 + Math.floor(Math.random() * 1000); + const journalRepoPath = path.join(tmpDir, "journal"); + const worktreeBaseDir = path.join(tmpDir, "worktrees"); + const workspaceDir = path.join(tmpDir, "workspace"); + const agentRegistry = path.join(tmpDir, "registry.json"); + + fs.mkdirSync(workspaceDir, { recursive: true }); + fs.mkdirSync(path.join(workspaceDir, "data"), { recursive: true }); + fs.writeFileSync(agentRegistry, JSON.stringify({ + agents: [ + { id: "coder", assignable: true, reviewer: true, consulted: true }, + { id: "overseer", assignable: true, reviewer: true, consulted: true }, + ], + }, null, 2)); + initJournalRepo(journalRepoPath); + + core = new OrchestrationCore({ + dbPath, + invariantChecks: true, + snapshotEvery: 50, + }); + + config = { + ...loadConfig(), + port, + dbPath, + agentRegistry, + workspaceDir, + journalRepoPath, + worktreeBaseDir, + runtimeFile: "", + }; + + server = createHttpServer(core, config); + await new Promise((resolve) => { + server.listen(port, "127.0.0.1", resolve); + }); +} + +async function teardown(): Promise { + await new Promise((resolve) => server.close(() => resolve())); + core.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +} + +beforeEach(setup); +afterEach(teardown); + +test("task plan materializes a markdown checklist into child tasks end-to-end", async () => { + const createRes = await request("POST", "/tasks", { + title: "Plan import parent", + description: "Verify task plan end-to-end", + assignee: "coder", + costBudget: 25, + }); + assert.equal(createRes.status, 201); + + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const planPath = path.join(tmpDir, "plan.md"); + fs.writeFileSync(planPath, [ + "# Planning", + "- [ ] Parse markdown checklist (cost: 5, assignee: coder)", + " Capture headings and metadata.", + "- [ ] Materialize children (reviewer: overseer)", + " Verify the parent/child shape in taskcore/dashboard.", + ].join("\n")); + + const cli = spawnSync( + process.execPath, + ["--import", "tsx", "core/cli/task.ts", "plan", "--file", planPath], + { + cwd: repoRoot, + env: { + ...process.env, + ORCHESTRATOR_PORT: String(port), + TASKCORE_AGENT_ID: "coder", + TASK_ID: "1", + }, + encoding: "utf-8", + }, + ); + + assert.equal(cli.status, 0, `stdout:\n${cli.stdout}\n\nstderr:\n${cli.stderr}`); + assert.match(cli.stdout, /Plan materialized for T1/); + assert.match(cli.stdout, /Created 2 child tasks/); + + const parentRes = await request("GET", "/tasks/1"); + assert.equal(parentRes.status, 200); + const parent = (parentRes.body as { + task: { + phase: string; + condition: string; + children: string[]; + coordination?: { mode?: string }; + }; + }).task; + + assert.equal(parent.phase, "analysis"); + assert.equal(parent.condition, "waiting"); + assert.deepEqual(parent.children, ["2", "3"]); + assert.equal(parent.coordination?.mode, "sequential_children"); + + const childrenRes = await request("GET", "/tasks?parentId=1&full=true"); + assert.equal(childrenRes.status, 200); + const children = (childrenRes.body as { + tasks: Array<{ + id: string; + title: string; + description: string; + phase: string; + condition: string; + metadata: { assignee?: string; reviewer?: string }; + cost: { allocated: number }; + }>; + }).tasks; + + assert.equal(children.length, 2); + assert.deepEqual( + children.map((child) => ({ + id: child.id, + title: child.title, + description: child.description, + phase: child.phase, + condition: child.condition, + assignee: child.metadata.assignee, + reviewer: child.metadata.reviewer, + allocated: child.cost.allocated, + })), + [ + { + id: "2", + title: "Planning: Parse markdown checklist", + description: "Capture headings and metadata.", + phase: "analysis", + condition: "ready", + assignee: "coder", + reviewer: undefined, + allocated: 5, + }, + { + id: "3", + title: "Planning: Materialize children", + description: "Verify the parent/child shape in taskcore/dashboard.", + phase: "analysis", + condition: "waiting", + assignee: undefined, + reviewer: "overseer", + allocated: 20, + }, + ], + ); +}); diff --git a/core/test/plan-parse.test.ts b/core/test/plan-parse.test.ts new file mode 100644 index 0000000..e1e4543 --- /dev/null +++ b/core/test/plan-parse.test.ts @@ -0,0 +1,269 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { allocateCosts, parsePlan } from "../cli/plan-parse.js"; + +// --------------------------------------------------------------------------- +// Parser tests +// --------------------------------------------------------------------------- + +test("parsePlan: checklist items", () => { + const items = parsePlan("- [ ] First step\n- [ ] Second step"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: checked and unchecked checklist both parse", () => { + const items = parsePlan("- [ ] Pending\n- [x] Done\n- [X] Also done"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Pending"); + assert.equal(items[1]!.title, "Done"); + assert.equal(items[2]!.title, "Also done"); +}); + +test("parsePlan: ordered list items", () => { + const items = parsePlan("1. Alpha\n2. Beta\n3. Gamma"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Alpha"); + assert.equal(items[1]!.title, "Beta"); + assert.equal(items[2]!.title, "Gamma"); +}); + +test("parsePlan: plain bullet items", () => { + const items = parsePlan("- Do thing A\n- Do thing B"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Do thing A"); + assert.equal(items[1]!.title, "Do thing B"); +}); + +test("parsePlan: headings prefix subsequent items", () => { + const items = parsePlan("# Setup\n- Install deps\n# Cleanup\n- Remove temp files"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Setup: Install deps"); + assert.equal(items[1]!.title, "Cleanup: Remove temp files"); +}); + +test("parsePlan: heading context resets on new heading", () => { + const items = parsePlan("# Phase 1\n- Item A\n# Phase 2\n- Item B"); + assert.equal(items[0]!.title, "Phase 1: Item A"); + assert.equal(items[1]!.title, "Phase 2: Item B"); +}); + +test("parsePlan: no heading means no prefix", () => { + const items = parsePlan("- Plain item"); + assert.equal(items[0]!.title, "Plain item"); +}); + +test("parsePlan: indented continuation lines become description", () => { + const items = parsePlan( + "- Do the thing\n With extra context here\n And more details", + ); + assert.equal(items.length, 1); + assert.equal(items[0]!.title, "Do the thing"); + assert.equal(items[0]!.description, "With extra context here\nAnd more details"); +}); + +test("parsePlan: description defaults to item text when no continuations", () => { + const items = parsePlan("- Simple item"); + assert.equal(items[0]!.description, "Simple item"); +}); + +test("parsePlan: blank lines between items are ignored", () => { + const items = parsePlan("- First\n\n- Second\n\n- Third"); + assert.equal(items.length, 3); +}); + +test("parsePlan: unrecognised prose flushes the pending item", () => { + const items = parsePlan([ + "- First step", + "This paragraph should not become a continuation line", + "- Second step", + ].join("\n")); + + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[0]!.description, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: inline cost metadata", () => { + const items = parsePlan("- Do something (cost: 15)"); + assert.equal(items[0]!.title, "Do something"); + assert.equal(items[0]!.cost, 15); +}); + +test("parsePlan: inline assignee and reviewer", () => { + const items = parsePlan("- Do something (assignee: coder, reviewer: overseer)"); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "overseer"); +}); + +test("parsePlan: skip-analysis flag true", () => { + const items = parsePlan("- Trivial task (skip-analysis: true)"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: skip-analysis flag false", () => { + const items = parsePlan("- Normal task (skip-analysis: false)"); + assert.equal(items[0]!.skipAnalysis, false); +}); + +test("parsePlan: combined metadata", () => { + const items = parsePlan( + "- Full item (cost: 20, assignee: coder, reviewer: lead, skip-analysis: true)", + ); + assert.equal(items[0]!.title, "Full item"); + assert.equal(items[0]!.cost, 20); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "lead"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: metadata stripped from title", () => { + const items = parsePlan("- Fix the bug (cost: 5)"); + assert.equal(items[0]!.title, "Fix the bug"); + assert.equal(items[0]!.cost, 5); +}); + +test("parsePlan: heading prefix not applied to description", () => { + const items = parsePlan("# Phase\n- Task title"); + assert.equal(items[0]!.title, "Phase: Task title"); + assert.equal(items[0]!.description, "Task title"); // description has no heading prefix +}); + +test("parsePlan: mixed formats in one plan", () => { + const plan = [ + "# Bootstrap", + "- [ ] Install dependencies (cost: 5)", + "1. Configure environment (cost: 10, assignee: infra)", + "# Feature", + "- Build the thing", + " Detailed instructions here", + ].join("\n"); + + const items = parsePlan(plan); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Bootstrap: Install dependencies"); + assert.equal(items[0]!.cost, 5); + assert.equal(items[1]!.title, "Bootstrap: Configure environment"); + assert.equal(items[1]!.cost, 10); + assert.equal(items[1]!.assignee, "infra"); + assert.equal(items[2]!.title, "Feature: Build the thing"); + assert.equal(items[2]!.description, "Detailed instructions here"); +}); + +// --------------------------------------------------------------------------- +// Cost allocation tests +// --------------------------------------------------------------------------- + +test("allocateCosts: errors on empty items", () => { + const result = allocateCosts([], 100); + assert.equal(result.ok, false); +}); + +test("allocateCosts: errors on zero budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, 0); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no positive budget/); +}); + +test("allocateCosts: errors on negative budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, -5); + assert.equal(result.ok, false); +}); + +test("allocateCosts: uses explicit costs as-is", () => { + const items = parsePlan("- Step A (cost: 10)\n- Step B (cost: 20)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 20); +}); + +test("allocateCosts: distributes remaining budget evenly", () => { + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 10); + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: distributes remainder cents to first items", () => { + // $10.01 across 3 items → 3.34, 3.34, 3.33 (1 extra cent to first two) + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 10.01); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 1001); + assert.equal(result.items[0]!.cost, 3.34); + assert.equal(result.items[1]!.cost, 3.34); + assert.equal(result.items[2]!.cost, 3.33); +}); + +test("allocateCosts: mixes explicit and auto-distributed costs", () => { + const items = parsePlan("- A (cost: 10)\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); // explicit + assert.equal(result.items[1]!.cost, 10); // (30 - 10) / 2 + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: total equals budget when all auto", () => { + const items = parsePlan("- A\n- B"); + const result = allocateCosts(items, 50); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 5000); +}); + +test("allocateCosts: errors if explicit costs exceed budget", () => { + const items = parsePlan("- A (cost: 60)\n- B (cost: 50)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /exceed/); +}); + +test("allocateCosts: errors if no budget left for unspecified items", () => { + const items = parsePlan("- A (cost: 100)\n- B"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no budget/); +}); + +test("allocateCosts: preserves assignee and reviewer on items", () => { + const items = parsePlan("- Task (cost: 10, assignee: coder, reviewer: lead)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.assignee, "coder"); + assert.equal(result.items[0]!.reviewer, "lead"); +}); + +test("allocateCosts: preserves skipAnalysis flag", () => { + const items = parsePlan("- Task (cost: 10, skip-analysis: true)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.skipAnalysis, true); +}); + +test("allocateCosts: single item gets full budget when no cost specified", () => { + const items = parsePlan("- Single task"); + const result = allocateCosts(items, 25); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 25); +}); diff --git a/core/test/scenarios.test.ts b/core/test/scenarios.test.ts index 1ac82a1..a585171 100644 --- a/core/test/scenarios.test.ts +++ b/core/test/scenarios.test.ts @@ -1405,6 +1405,49 @@ test("Scenario S: LeaseExpired auto-emission transitions leased task to retryWai assert.equal(state.tasks.T1800?.leasedTo, null); }); +test("Scenario S2: malformed active task with missing lease is retried by the clock", () => { + let state = createInitialState(); + state = mustReduce(state, createTask("T1801", 1)); + state = mustReduce(state, lease("T1801", 2, 1, "analysis", "analyst", "a-1801")); + + state.tasks.T1801!.leaseExpiresAt = null; + + const clock = new CoreClock(); + const due = clock.collectDueEvents(state, 10_000); + const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1801"); + assert.ok(retryScheduled); + if (!retryScheduled || retryScheduled.type !== "RetryScheduled") { + assert.fail("expected RetryScheduled"); + } + assert.equal(retryScheduled.reason, "orphaned_on_restart"); + + state = mustReduce(state, retryScheduled); + assert.equal(state.tasks.T1801?.condition, "retryWait"); + assert.equal(state.tasks.T1801?.leasedTo, null); + assert.equal(state.tasks.T1801?.leaseExpiresAt, null); +}); + +test("Scenario S3: malformed active task with empty agent is retried by the clock", () => { + let state = createInitialState(); + state = mustReduce(state, createTask("T1802", 1)); + state = mustReduce(state, lease("T1802", 2, 1, "analysis", "analyst", "a-1802")); + + state.tasks.T1802!.leasedTo = ""; + + const clock = new CoreClock(); + const due = clock.collectDueEvents(state, 10_000); + const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1802"); + assert.ok(retryScheduled); + if (!retryScheduled || retryScheduled.type !== "RetryScheduled") { + assert.fail("expected RetryScheduled"); + } + assert.equal(retryScheduled.reason, "orphaned_on_restart"); + + state = mustReduce(state, retryScheduled); + assert.equal(state.tasks.T1802?.condition, "retryWait"); + assert.equal(state.tasks.T1802?.leasedTo, null); +}); + test("Scenario T: WaitResolved(block) blocks task and propagates summary to parent", () => { let state = createInitialState(); state = mustReduce(state, createTask("T1900", 1)); diff --git a/core/test/validator.test.ts b/core/test/validator.test.ts index eb27d25..4638cc9 100644 --- a/core/test/validator.test.ts +++ b/core/test/validator.test.ts @@ -63,6 +63,34 @@ test("validator rejects non-monotonic fence token", () => { assert.equal(error.code, "fence_not_monotonic"); }); +test("validator rejects LeaseGranted with empty agent id", () => { + const state = bootstrapState(); + + const invalidLease: Event = { + type: "LeaseGranted", + taskId: "T10", + ts: 2, + fenceToken: 1, + agentId: " ", + phase: "analysis", + leaseTimeout: 60_000, + sessionId: "sess-1", + sessionType: "fresh", + contextBudget: 512, + agentContext: { + sessionId: "sess-1", + agentId: " ", + memoryRef: null, + contextTokens: null, + modelId: "test", + }, + }; + + const error = validateEvent(state, invalidLease); + assert.ok(error); + assert.equal(error.code, "invalid_agent_id"); +}); + test("validator enforces failure summary on TaskFailed", () => { const state = bootstrapState(); diff --git a/core/validator.ts b/core/validator.ts index 5c6d4fd..eb7ec6f 100644 --- a/core/validator.ts +++ b/core/validator.ts @@ -321,6 +321,14 @@ function validateSessionPolicy( return null; } +function validateLeaseAgent(event: Extract): ValidationError | null { + if (!nonEmptyText(event.agentId)) { + return mkError(event, "invalid_agent_id", "LeaseGranted.agentId must be non-empty."); + } + + return null; +} + function validateWaitAction(event: Extract): ValidationError | null { if (event.action === "block" && !validFailureSummary(event.summary)) { return mkError( @@ -493,6 +501,11 @@ export function validateEvent(state: SystemState, event: Event): ValidationError return mkError(event, "invalid_context_budget", "Context budget must be a positive integer."); } + const agentError = validateLeaseAgent(event); + if (agentError) { + return agentError; + } + const attempt = task.attempts[event.phase]; if (attempt.used >= attempt.max) { return mkError(event, "attempt_budget_exhausted", "Attempt budget exhausted for current phase.", { diff --git a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md new file mode 100644 index 0000000..699d221 --- /dev/null +++ b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md @@ -0,0 +1,159 @@ +# T1710 — Capability-First Orchestration Hardening + +## Executive recommendation + +This program is **too large and cross-cutting to execute as a single task**. It should be decomposed into ordered child tasks with one parent artifact that fixes the architecture, sequencing, and acceptance criteria. + +The failure pattern is not "one bug". It is a systems problem caused by trying to decompose and execute uncertain missions **before** the system knows: +- what capabilities are actually available, +- which prerequisites are missing, +- which steps are reversible vs. irreversible, +- how partial progress should be preserved, +- when repeated failure should trigger a strategy change instead of more retries. + +## Problem framing + +The target workflow class has five characteristics: +1. **High uncertainty at start** — entity matching, environment state, or account status may be unknown. +2. **Browser-mediated execution** — progress depends on live UI state, auth, and fragile selectors. +3. **Partially irreversible actions** — clicks, submissions, trades, messages, or confirmations may have consequences. +4. **Mixed research + execution** — discovery work is often bundled with deterministic action steps. +5. **Infrastructure noise** — browser relay, auth state, tool health, and mutation-path failures create false task churn. + +If the orchestrator treats these as ordinary deterministic tasks, it creates the same failure loop: +- decompose too early, +- assign execution before readiness, +- lose partial findings in review/aggregate handoffs, +- retry the same failing path, +- escalate risk near irreversible steps. + +## Strategic design principles + +### 1. Capability-first before decomposition +Before generating child tasks, the system should produce a **mission capability snapshot**: +- available tools and runtimes, +- authenticated systems/accounts, +- browser availability and attachment state, +- permission constraints, +- verification channels, +- human-approval requirements, +- known blockers. + +If readiness is low, the system should create **prerequisite tasks** first, not execution tasks. + +### 2. Separate discovery from deterministic execution +A mission should not begin with an execution plan when the main unknown is still identification, feasibility, auth, or state verification. + +Use two lanes: +- **Discovery lane**: identify entities, inspect environment, map options, and gather evidence. +- **Execution lane**: perform deterministic, validated steps only after inputs are stable. + +This preserves operator clarity and reduces bogus "execution failures" that are really unresolved discovery problems. + +### 3. Preserve partial progress as first-class artifacts +When a child task uncovers verified facts but cannot finish the end-to-end mission, that output must survive reviews, retries, and replanning. + +Required artifact types: +- capability snapshots, +- matched entities / rejected candidates, +- prerequisite checklist state, +- evidence bundles, +- environment fingerprints, +- execution-ready plans, +- approval packets for irreversible actions. + +The parent should aggregate these artifacts instead of forcing children into a binary success/fail shape. + +### 4. Fingerprint failure modes, then switch strategy +Repeated retries are only rational if the failure mode is transient. The orchestrator should classify failures into buckets such as: +- auth missing/expired, +- browser relay unattached, +- selector / UI drift, +- external system ambiguity, +- runtime/tool unavailable, +- mutation accepted but verification unavailable, +- approval required but not granted. + +Each class needs a defined next action: retry, reroute, decompose prerequisite, request approval, or stop. + +### 5. Approval-gated lane for irreversible actions +Irreversible or safety-sensitive actions should require a specific lane with: +- explicit action summary, +- preconditions satisfied, +- target/entity verified, +- rollback possibilities documented, +- approval token or human confirmation captured, +- post-action verification defined. + +This should not share the same semantics as low-risk research tasks. + +## Proposed decomposition + +### Child 1 — Mission capability registry and readiness model +**Goal:** define machine-readable representation of capabilities, prerequisites, and readiness scoring. +**Output:** schema + readiness levels + examples + integration points. + +### Child 2 — Execution preflight and prerequisite detection +**Goal:** build the gate that runs before decomposition/execution to detect missing auth, tools, browser state, permissions, and verification channels. +**Output:** preflight rules, fail-fast decisions, prerequisite task generation rules. + +### Child 3 — Grounding and uncertain-entity evaluation framework +**Goal:** handle missions where the target entity, account, page, or record is uncertain. +**Output:** match confidence model, evidence requirements, safe stopping conditions. + +### Child 4 — Failure fingerprinting and strategy switching +**Goal:** stop naive retry loops and route repeated failures to the right next strategy. +**Output:** failure taxonomy, retry budgets, switching rules, observability requirements. +**Design artifact:** `docs/ops/t1712-failure-fingerprinting-strategy-switching.md` + +### Child 5 — Approval-gated irreversible-action lane +**Goal:** create a separate workflow for steps with material consequences. +**Output:** approval packet schema, gate conditions, execution/verification semantics. + +### Child 6 — Aggregate policy and artifact-first closure semantics +**Goal:** preserve partial progress through review and parent aggregation. +**Output:** child completion semantics, artifact contract, parent merge rules, review checklist. + +### Child 7 — Cross-repo implementation and validation plan +**Goal:** map where changes belong across taskcore and colony, sequence rollout, and define tests. +**Output:** implementation order, repo ownership, migration plan, acceptance tests. + +## Ordering recommendation + +Recommended execution order: +1. Capability registry and readiness model +2. Execution preflight and prerequisite detection +3. Grounding / uncertain-entity evaluation +4. Failure fingerprinting and strategy switching +5. Approval-gated lane for irreversible actions +6. Aggregate policy and artifact-first closure semantics +7. Cross-repo implementation and validation plan + +Rationale: +- readiness and preflight are foundation layers, +- grounding determines whether execution should even begin, +- failure handling is only useful after readiness semantics exist, +- approval gating depends on stable preconditions and verification semantics, +- aggregate closure should be shaped after artifact types are defined, +- implementation planning should be last so it reflects the final architecture. + +## Acceptance criteria for the parent task + +T1710 should only be considered complete when it produces: +- a parent architecture memo with the final system model, +- child tasks covering all six functional areas plus rollout/validation, +- explicit artifact contracts between children, +- a recommended order of implementation, +- concrete acceptance tests for the integrated workflow. + +## What not to do + +- Do **not** patch a single historical workflow. +- Do **not** encode browser-specific hacks as general orchestration policy. +- Do **not** collapse discovery, execution, and approval into one task type. +- Do **not** use success/failure alone as the parent aggregation model. +- Do **not** allow irreversible execution without explicit preconditions and approval semantics. + +## Immediate next move + +Decompose T1710 into the ordered child tasks above, using domain-agnostic language and artifact-focused outputs. The parent remains responsible for the integrated architecture and rollout sequence. diff --git a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md new file mode 100644 index 0000000..576c279 --- /dev/null +++ b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md @@ -0,0 +1,449 @@ +# T1712 — Failure fingerprinting, strategy switching, and dynamic retry budgets + +## Problem statement + +TaskCore currently treats most agent failures as variations of the same event: +- `task-executor.mjs` increments a single `retryCount` +- work is re-queued with generic exponential backoff +- only rate limits get a distinct path +- once `MAX_RETRIES` is exhausted, the task is simply blocked + +That is too lossy for uncertain, browser-mediated, or dependency-heavy work. The system cannot distinguish: +- a stale login that should wake a shared auth blocker +- anti-bot / access denial that should halt sibling work on the same target +- missing inputs that should create a prerequisite task instead of more retries +- invalid state transitions that require replanning, not repetition +- verification failures after a mutation, where autonomy should stop and escalate +- repeated cost / provider exhaustion, where dispatch should shift strategy globally + +The result is sibling churn: multiple leaves burn retries on the same blocker even when the next rational move is shared blocker-removal or mission replanning. + +--- + +## Design goals + +1. **Canonical failure identity** — repeated failures with the same operative cause produce the same fingerprint. +2. **Scope-aware routing** — leaf-local failures stay local; shared blockers fan in to one prerequisite task; global exhaustion triggers broader throttling. +3. **Strategy switching over blind retries** — retry only when the failure class is plausibly transient. +4. **Task-kind-aware budgets** — execution, aggregate, and artifact-only tasks do not share the same retry policy. +5. **Inspectable decisions** — every retry, pause, reroute, and blocker promotion is visible in task metadata and executor outcome logs. +6. **Fail-closed near irreversible work** — verification and state-transition failures on execution tasks escalate before more automation is attempted. + +### Non-goals + +- Replacing provider allocation gating from T755. +- Replacing the recovery breaker engine for host/service remediation. +- Solving target grounding/entity ambiguity (that belongs to T1704/T2110-style grounding work). + +--- + +## Where this policy plugs in + +Primary integration points: +1. **`scripts/task-executor.mjs`** — authoritative classification, retry budgeting, and routing. +2. **task metadata in `.taskmaster/tasks/tasks.json`** — persistent fingerprint, counters, blocker linkage, and route decisions. +3. **executor outcome log** (`data/task-dashboard/executor_outcomes.jsonl`) — append fingerprint + routing evidence. +4. **dashboard export / APIs** — surface repeated failure clusters, promoted blockers, and retry-budget exhaustion. + +This task defines the contract and routing model. A later execution task should implement the plumbing. + +--- + +## 1) Canonical failure fingerprint model + +A **failure fingerprint** is the deduplicated identity of a task failure for orchestration purposes. + +### Proposed schema + +```json +{ + "version": "v1", + "fingerprintId": "ffp_01HV...", + "taskId": 1712, + "taskKind": "execution", + "runPhase": "work", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "resourceKey": "telegram:account:primary", + "surface": "browser_relay", + "reasonCode": "session_expired", + "signature": { + "provider": "openai-codex", + "model": "gpt-5.4", + "exitType": "agent_crash", + "stderrClass": "login_required", + "targetRef": "telegram-web" + }, + "dedupeKey": "auth_session_failure|telegram:account:primary|browser_relay|session_expired", + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "attempts": 3, + "affectedTaskIds": [1712, 1718, 1721], + "recommendedStrategy": "wake_or_create_blocker", + "recommendedBlockerKey": "blocker:auth:telegram:account:primary" +} +``` + +### Required fields + +| Field | Meaning | +|---|---| +| `taskKind` | `execution`, `aggregate`, `artifact_only`, `review`, `capability_probe`, etc. | +| `runPhase` | `work` or `review` | +| `failureClass` | canonical orchestrator-facing class | +| `scope` | `leaf_local`, `shared_prerequisite`, or `global_budget` | +| `resourceKey` | normalized target of the blocker (`browser:relay:chrome`, `human:kas`, `provider:openai-codex`) | +| `reasonCode` | finer-grained sub-cause | +| `dedupeKey` | stable routing key used to collapse repeats | +| `attempts` | count of repeated hits within policy window | +| `recommendedStrategy` | `retry_same_path`, `switch_strategy`, `wake_or_create_blocker`, `pause_for_review`, `global_throttle` | + +### Scope semantics + +- **`leaf_local`** — retry/replan only this task. Example: malformed prompt for one artifact-only task. +- **`shared_prerequisite`** — stop retrying sibling leaves and promote a shared blocker. Example: expired auth, missing credential, inaccessible website. +- **`global_budget`** — provider/cost saturation or broad platform outage; gate future dispatch and avoid local churn. + +--- + +## 2) Canonical failure classes + +These are the minimum classes required for T1712 acceptance. + +| Failure class | Typical signals | Default scope | Default strategy | +|---|---|---|---| +| `auth_session_failure` | login required, expired cookie/session, wallet disconnected, missing permission grant | `shared_prerequisite` | pause affected leaves, wake/create auth blocker | +| `access_denial_antibot` | captcha, 403, antibot page, WAF deny, account challenge | `shared_prerequisite` | stop automation path, request human unblock / alternate channel | +| `missing_input` | required ID/file/approval/parameter absent | `shared_prerequisite` if shared, else `leaf_local` | create prerequisite task or request user input | +| `invalid_state_transition` | task tries action from wrong state, precondition invalid, already-submitted/closed/cancelled | `leaf_local` or `shared_prerequisite` if state is mission-wide | replan from refreshed state; no same-path retry | +| `verification_failure` | mutation possibly happened but postcondition cannot be proven; conflicting checks | `leaf_local` on single leaf, fail-closed for execution | halt autonomy, require verification/review path | +| `cost_exhaustion_repeated` | provider denied for budget/quota reasons across attempts/windows | `global_budget` | throttle dispatch, shift provider/model/priority policy | + +Recommended additional classes for implementation completeness: +- `tool_runtime_unavailable` +- `ui_selector_drift` +- `rate_limit_transient` +- `dependency_blocked` +- `human_approval_missing` + +### Failure-class notes + +#### `auth_session_failure` +Examples: +- browser relay attached but session logged out +- API token missing or expired +- wallet connector present but no connected account + +Rule: after the second matching hit within the policy window, stop local retries and create/wake one shared auth blocker keyed by the affected account/resource. + +#### `access_denial_antibot` +Rule: never let sibling leaves keep probing the same blocked surface. Switch to a human-assisted or alternate-channel strategy immediately after first confirmed match. + +#### `missing_input` +Rule: if the missing input is shared by multiple children (e.g. target account id, approval token, attachment), collapse it into one prerequisite task and mark dependent leaves as waiting/blocked-by-dependency. + +#### `invalid_state_transition` +Rule: do not spend retry budget repeating an action against a stale assumption. Refresh state, then either replan or close as not-applicable. + +#### `verification_failure` +Rule: for execution tasks, verification failure after a mutation is **not retry-equivalent** to a normal crash. The system must stop and request human review or explicit verification work. + +#### `cost_exhaustion_repeated` +Rule: once the same quota/budget fingerprint repeats across tasks or time windows, it becomes a dispatch-policy problem, not a leaf problem. Route to global throttling or provider switch. + +--- + +## 3) Fingerprint derivation rules + +The executor should derive a fingerprint in four passes: + +1. **Normalize runtime facts** + - task kind + - run phase + - assignee/reviewer + - exit code / signal + - error tail classification + - known provider/model metadata + - target resource / surface + +2. **Assign failure class** + - use deterministic rule table before any model-based classifier + - allow only a bounded fallback classifier for unknown cases + +3. **Resolve routing scope** + - infer whether the cause is leaf-local, shared prerequisite, or global budget + +4. **Build dedupe key** + - `failureClass | resourceKey | surface | reasonCode` + - exclude volatile text (timestamps, raw stack traces, run ids) + +### Example derivations + +```text +stderr: "Telegram Web shows login required" +=> failureClass=auth_session_failure +=> resourceKey=telegram:web:primary +=> scope=shared_prerequisite +=> dedupeKey=auth_session_failure|telegram:web:primary|browser_relay|login_required +``` + +```text +stderr: "429 Too Many Requests from provider openai-codex" +=> failureClass=rate_limit_transient +=> resourceKey=provider:openai-codex +=> scope=leaf_local (single task) OR global_budget once repeated threshold trips +=> dedupeKey=rate_limit_transient|provider:openai-codex|dispatch|429 +``` + +```text +stderr: "proposal already published" +=> failureClass=invalid_state_transition +=> resourceKey=proposal:1234 +=> scope=shared_prerequisite if many leaves assume draft state +=> dedupeKey=invalid_state_transition|proposal:1234|mutation_path|already_published +``` + +--- + +## 4) Strategy-switching rules + +### Canonical strategies + +| Strategy | Use when | Result | +|---|---|---| +| `retry_same_path` | transient/local issue and retry budget remains | requeue same task with backoff | +| `switch_strategy` | same objective still valid but current path is irrational | reroute to alternate tool/channel/plan | +| `wake_or_create_blocker` | repeated shared blocker across leaves | create or wake one prerequisite task and pause dependents | +| `pause_for_review` | verification or safety-sensitive ambiguity | send to review / human confirmation | +| `global_throttle` | provider or budget exhaustion spans multiple tasks | deny/defer future dispatch until healthy | + +### Routing decision table + +| Failure class | First hit | Repeated hit | Exhausted state | +|---|---|---|---| +| `auth_session_failure` | retry once if evidence is weak; otherwise create blocker immediately | wake/create shared auth blocker; pause sibling leaves | mark dependency blocker and stop automation until prerequisite closes | +| `access_denial_antibot` | stop same-path retries; request alternate route | shared blocker + human review | quarantine target surface for cooldown window | +| `missing_input` | create/wake prerequisite or ask for input | collapse siblings onto same blocker | leave waiting on prerequisite, no further retries | +| `invalid_state_transition` | refresh state and re-evaluate | replan or close as superseded | no further retries on stale path | +| `verification_failure` | require explicit verification task / review | block execution lane on target | escalate to human with evidence bundle | +| `cost_exhaustion_repeated` | apply local defer/backoff | trigger provider/model/policy switch or dispatch gate deny | global throttle until healthy window returns | + +### Shared blocker promotion rule + +When all of the following hold, the executor promotes a blocker task: +1. `scope == shared_prerequisite` +2. same `dedupeKey` occurs on **>= 2 tasks** or **>= 2 attempts on one task** within the policy window +3. a blocker with the same `recommendedBlockerKey` is not already active + +Result: +- create or wake one blocker task +- attach `metadata.failureFingerprint.blockerTaskId` +- mark affected leaves as dependency-blocked / waiting on that blocker +- suppress additional same-fingerprint retries until blocker state changes + +### Replanning rule + +If a task hits `invalid_state_transition` or `verification_failure`, the next action should be a replan/verification step, not another leaf retry. The executor should either: +- create a child task for state refresh / verification, or +- send the task back to review with the fingerprint attached. + +--- + +## 5) Dynamic retry budget policy + +Retry budgets must be keyed by **task kind** and **failure class**, not one global `MAX_RETRIES`. + +### Policy table (recommended v1) + +| Task kind | Failure class | Auto retries | Backoff class | On exhaustion | +|---|---|---:|---|---| +| `execution` | `rate_limit_transient` | 2 | long | switch provider or defer via gate | +| `execution` | `auth_session_failure` | 1 | short | create/wake auth blocker | +| `execution` | `access_denial_antibot` | 0 | none | human unblock / alternate path | +| `execution` | `missing_input` | 0 | none | prerequisite task | +| `execution` | `invalid_state_transition` | 0 | none | refresh + replan | +| `execution` | `verification_failure` | 0 | none | review / verification task | +| `execution` | `tool_runtime_unavailable` | 1 | medium | reroute tool / pause | +| `aggregate` | `dependency_blocked` | 0 | none | wait for required children / blocker | +| `aggregate` | `missing_input` | 0 | none | request missing artifact coverage | +| `aggregate` | `tool_runtime_unavailable` | 1 | short | rerun reducer/export path | +| `aggregate` | `verification_failure` | 1 | short | review aggregate evidence | +| `artifact_only` | `tool_runtime_unavailable` | 2 | short | reroute agent/tool | +| `artifact_only` | `missing_input` | 0 | none | request source material | +| `artifact_only` | `invalid_state_transition` | 0 | none | usually close/supersede, not retry | +| `artifact_only` | `cost_exhaustion_repeated` | 1 | long | defer until budget recovers | +| `review` | `tool_runtime_unavailable` | 1 | short | reroute reviewer | +| `review` | `verification_failure` | 0 | none | escalate to human reviewer | + +### Why the policies differ + +- **Execution tasks** carry the most risk; most non-transient classes should not auto-retry. +- **Aggregate tasks** should rarely retry; repeated child blockers are usually dependency issues, not execution failures. +- **Artifact-only tasks** can tolerate slightly more retry on tooling failures because they do not directly mutate external state. + +### Budget accounting model + +Track two counters per task attempt window: +1. **`pathRetryCount`** — retries on the same strategy/path +2. **`strategySwitchCount`** — number of alternate paths already tried + +This prevents a task from escaping budget control by bouncing endlessly between weak alternatives. + +Recommended defaults: +- `execution`: `pathRetryCount <= 2`, `strategySwitchCount <= 1` +- `aggregate`: `pathRetryCount <= 1`, `strategySwitchCount <= 1` +- `artifact_only`: `pathRetryCount <= 2`, `strategySwitchCount <= 2` + +--- + +## 6) Proposed metadata contract + +Add the following metadata shape to task records: + +```json +{ + "metadata": { + "retryPolicy": { + "taskKind": "execution", + "pathRetryCount": 1, + "strategySwitchCount": 0, + "budgetWindow": "30m", + "lastBudgetDecision": "wake_or_create_blocker" + }, + "failureFingerprint": { + "fingerprintId": "ffp_01HV...", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "dedupeKey": "auth_session_failure|telegram:web:primary|browser_relay|login_required", + "attemptsWindow": 2, + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "recommendedStrategy": "wake_or_create_blocker", + "blockerTaskId": 1730 + }, + "blockedByFingerprint": true, + "sharedBlockerKey": "blocker:auth:telegram:web:primary" + } +} +``` + +### Executor outcome log extension + +Each `executor_outcomes.jsonl` record should append: +- `taskKind` +- `failureClass` +- `fingerprintDedupeKey` +- `routingDecision` +- `scope` +- `strategySwitched` (bool) +- `blockerTaskId` (if any) + +This is the minimum observability needed to prove sibling churn actually dropped after rollout. + +--- + +## 7) Shared blocker lifecycle + +### Blocker task creation contract + +When promoting shared blocker work, the system should create a task with: +- kind: `capability_probe` or `blocker_removal` +- title: deterministic and resource-based +- description: include fingerprint class, affected resource, and evidence bundle +- metadata: + - `blockerKey` + - `sourceFingerprint` + - `affectedTaskIds` + - `createdFromFailureRouter=true` + +### Wake vs create + +- **Wake existing blocker** when an active/pending blocker has the same `blockerKey`. +- **Create new blocker** only when no active blocker exists. + +### Leaf behavior while blocker active + +Affected leaves should not continue ordinary retry scheduling. Instead they should move to a dependency-held condition with: +- blocker task id +- blocker key +- fingerprint id +- timestamp of last routed match + +This is the mechanism that stops sibling retry burn. + +--- + +## 8) Observability and dashboards + +Required dashboard/reporting surfaces: +1. **Top repeated fingerprints** in the last 24h +2. **Shared blocker promotions** and number of leaves collapsed behind each blocker +3. **Retries avoided** after blocker promotion +4. **Failure-class breakdown by task kind** +5. **Global budget throttles** with provider/model linkage + +Key success metrics: +- drop in repeated identical failures per sibling set +- increase in blocker reuse rate +- reduction in tasks blocked only after max retries +- fewer execution tasks auto-retrying on verification failures + +--- + +## 9) Acceptance tests for the implementation task + +### A. Shared auth blocker +1. Two execution leaves fail with the same expired-session fingerprint. +2. System creates/wakes one auth blocker. +3. Sibling leaves stop consuming retries. +4. Both leaves reference the same blocker task id/key. + +### B. Anti-bot/access denial +1. First confirmed anti-bot fingerprint occurs on an execution task. +2. Executor does **not** requeue same path. +3. Alternate route or human unblock task is requested. + +### C. Missing shared input +1. Multiple tasks lack the same approval token / file. +2. One prerequisite task is created. +3. Additional failures attach to existing blocker instead of creating duplicates. + +### D. Invalid state transition +1. Execution task tries to mutate an already-finalized resource. +2. No same-path retry occurs. +3. Task routes to refresh/replan or closes as superseded. + +### E. Verification failure +1. Mutation step appears to succeed but verification check is inconclusive. +2. Task does not spend standard retry budget. +3. Task escalates to review/verification work. + +### F. Dynamic budgets by task kind +1. `artifact_only` task with tool outage gets >0 retries. +2. `aggregate` task with dependency blocker gets 0 same-path retries. +3. `execution` task with auth failure gets <=1 retry before blocker promotion. + +### G. Repeated cost exhaustion +1. Same provider quota exhaustion hits multiple tasks in window. +2. A global throttle / provider-switch path is activated. +3. Future dispatch is deferred rather than burning leaf retries. + +--- + +## 10) Implementation order recommendation + +1. Add fingerprint schema + deterministic classifier in executor. +2. Persist metadata and outcome-log extensions. +3. Add blocker promotion / wake semantics. +4. Add per-task-kind retry policy table. +5. Add dashboard aggregation + regression tests. + +This order ensures the system can first *see* repeated failure identity, then *route* it, then *enforce* differentiated budgets. + +--- + +## Bottom line + +T1712 should change TaskCore from **"every failure increments one retry counter"** to **"each failure class carries a scoped fingerprint and a bounded next strategy."** + +That is the key behavior change needed to stop repeated sibling churn, promote blocker-removal work when appropriate, and make retry policy depend on both **what failed** and **what kind of task failed**. diff --git a/docs/task-cli-spec.md b/docs/task-cli-spec.md index 661d03a..af2dfb8 100644 --- a/docs/task-cli-spec.md +++ b/docs/task-cli-spec.md @@ -510,6 +510,105 @@ Abort a pending decomposition session without committing. task decompose cancel ``` +#### `task plan` / `task decompose plan` — one-shot plan-based decomposition + +Submit a complete decomposition in one command from a structured plan text. +Uses the same `POST /tasks/:id/decompose` endpoint as a regular commit, so +children appear in the dashboard immediately. + +`task plan` is the low-friction shortcut for agents; `task decompose plan` is +its explicit decomposition-scoped equivalent. + +``` +task plan "" +task plan --file +task plan --stdin + [--strategy sequential|parallel] (default: sequential) + +# Equivalent forms +task decompose plan --items "" +task decompose plan --file +task decompose plan --stdin + [--strategy sequential|parallel] (default: sequential) +``` + +**Plan format:** + +```markdown +# Optional Section Heading +- [ ] Checklist step title (cost: 10, assignee: coder) +- [x] Already-checked step (cost: 5) +1. Ordered list step +- Plain bullet step + Indented lines (2+ spaces) become the child description. + Multiple continuation lines are joined with newlines. +``` + +Supported item types: checklist (`- [ ]`), ordered (`1.`), plain bullet (`-`). + +Headings (`#`, `##`, etc.) set a context prefix applied to all following items +until the next heading: `"Section: Step title"`. + +**Trailing metadata** (optional, at end of item line): + +``` +(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true) +``` + +Supported keys: `cost`, `assignee`, `reviewer`, `skip-analysis`. + +**Plan editing / propagation semantics (v1):** + +- The imported list order becomes the child task order. With the default + `sequential` strategy, that order also drives which child is ready first. +- Checkbox state in the source markdown is **not** treated as task completion. + Checked and unchecked items both import as child tasks; completion is tracked + by taskcore child state after materialization. +- Before materialization, edit the markdown and re-run `task plan`. +- After materialization, the child tasks become the durable source of truth + shown in taskcore/dashboard. If the remaining plan needs to change, create a + new decomposition version for the remaining work rather than mutating + completed child history in place. + +**Cost allocation:** + +- Items with explicit `cost` keep that value. +- Items without `cost` share the remaining budget evenly (integer-cent + arithmetic — no float drift). +- Errors clearly if explicit costs exceed the parent's remaining budget, or + if there is no budget left for uncosted items. + +**Example:** + +```markdown +# Infrastructure +- [ ] Provision database (cost: 15, assignee: infra) +- [ ] Configure networking (cost: 10, assignee: infra) + +# Application +- [ ] Implement API endpoints (assignee: coder, reviewer: lead) + Build REST endpoints for user CRUD operations. +- [ ] Write integration tests +``` + +``` +task plan --file plan.md --strategy sequential +``` + +**Output:** + +``` +--- Plan materialized for T42 --- + +Created 4 child tasks: + T101: Infrastructure: Provision database 15.00 + T102: Infrastructure: Configure networking 10.00 + T103: Application: Implement API endpoints 12.50 + T104: Application: Write integration tests 12.50 + +Parent task is now a coordinator. Children will execute the plan. +``` + ### 3.6 Review Workflow (guided, multi-step) When a task is in the `review` phase and claimed by a reviewer. diff --git a/middle/http.ts b/middle/http.ts index 2278a5f..6c471a3 100644 --- a/middle/http.ts +++ b/middle/http.ts @@ -712,11 +712,15 @@ function handleClaimTask( return { status: 409, body: { error: "terminal_task", message: `Task ${taskId} is terminal` } }; } - const agentId = typeof b.agentId === "string" && b.agentId.trim().length > 0 - ? b.agentId.trim() - : typeof b.agent === "string" && b.agent.trim().length > 0 - ? b.agent.trim() - : "unknown"; + const requestedAgentId = typeof b.agentId === "string" + ? b.agentId + : typeof b.agent === "string" + ? b.agent + : null; + if (requestedAgentId !== null && requestedAgentId.trim().length === 0) { + return { status: 400, body: { error: "invalid_agent_id", message: "agentId must be non-empty." } }; + } + const agentId = requestedAgentId?.trim() || "unknown"; const fenceToken = task.currentFenceToken + 1; const sessionId = crypto.randomUUID(); @@ -971,8 +975,11 @@ function handleListTasks( assignee: t.metadata["assignee"] ?? null, reviewer: t.metadata["reviewer"] ?? null, priority: t.metadata["priority"] ?? "medium", + activeAgent: t.leasedTo, + leaseExpiresAt: t.leaseExpiresAt, createdAt: t.createdAt, updatedAt: t.updatedAt, + updatedAtMs: t.updatedAt, })); return { status: 200, body: { tasks: summaries } }; @@ -2900,8 +2907,11 @@ function collectAttentionTasks(core: Core): { (t) => !t.terminal && t.condition === "active" && - t.leaseExpiresAt !== null && - t.leaseExpiresAt <= now, + ( + t.leaseExpiresAt === null || + (typeof t.leasedTo !== "string" || t.leasedTo.trim().length === 0) || + t.leaseExpiresAt <= now + ), ) .map(toSummary); diff --git a/middle/journal.ts b/middle/journal.ts index bb77768..d840023 100644 --- a/middle/journal.ts +++ b/middle/journal.ts @@ -255,6 +255,7 @@ function git(cwd: string, args: string[]): string { cwd, encoding: "utf-8", timeout: 10_000, + stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, GIT_TERMINAL_PROMPT: "0", diff --git a/middle/prompt.ts b/middle/prompt.ts index 99790ab..32e0c95 100644 --- a/middle/prompt.ts +++ b/middle/prompt.ts @@ -412,7 +412,8 @@ function appendDecompositionInstructions(sections: string[], task: Task, config: sections.push(""); sections.push("## How to Submit Your Decomposition"); sections.push(""); - sections.push("Use the incremental decompose CLI — it guides you step by step:"); + sections.push("Preferred when you already have a checklist: use the one-shot plan flow (`task plan --file plan.md` or `task decompose plan --stdin`)."); + sections.push("If you need to build children incrementally, use the step-by-step decompose flow below:"); sections.push(""); sections.push("```bash"); sections.push(`# Step 1: Start a decomposition session`); diff --git a/middle/test/http.test.ts b/middle/test/http.test.ts index f73e336..af87f5b 100644 --- a/middle/test/http.test.ts +++ b/middle/test/http.test.ts @@ -1,3 +1,4 @@ +import { execFileSync } from "node:child_process"; import * as http from "node:http"; import { test, describe, beforeEach, afterEach } from "node:test"; import * as assert from "node:assert/strict"; @@ -110,6 +111,16 @@ async function teardown(): Promise { } } +function initRepo(repoPath: string): void { + fs.mkdirSync(repoPath, { recursive: true }); + execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" }); + fs.writeFileSync(path.join(repoPath, "README.md"), "# test\n", "utf-8"); + execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" }); +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -209,6 +220,35 @@ describe("HTTP API", () => { assert.equal(body.tasks.length, 2); }); + test("GET /tasks summaries include active agent and lease metadata", async () => { + await request("POST", "/tasks", { + title: "Leased task", + description: "Should expose lease info in summary", + assignee: "coder", + }); + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const res = await request("GET", "/tasks"); + assert.equal(res.status, 200); + const body = res.body as { + tasks: Array<{ + id: string; + activeAgent: string | null; + leaseExpiresAt: number | null; + updatedAtMs: number | null; + }>; + }; + const task = body.tasks.find((entry) => entry.id === "1"); + assert.ok(task); + assert.equal(task.activeAgent, "coder"); + assert.equal(typeof task.leaseExpiresAt, "number"); + assert.equal(task.updatedAtMs !== null && task.updatedAtMs > 0, true); + }); + test("GET /dispatchable lists dispatchable tasks", async () => { await request("POST", "/tasks", { title: "Ready task", @@ -232,6 +272,21 @@ describe("HTTP API", () => { assert.equal(res.status, 404); }); + test("POST /tasks/:id/claim rejects blank agent id", async () => { + await request("POST", "/tasks", { + title: "Claim validation", + description: "Blank claim agent should fail", + }); + + const res = await request("POST", "/tasks/1/claim", { + agentId: " ", + source: "test", + }); + assert.equal(res.status, 400); + const body = res.body as { error: string }; + assert.equal(body.error, "invalid_agent_id"); + }); + test("PATCH /tasks/:id/metadata updates priority", async () => { await request("POST", "/tasks", { title: "Metadata test", @@ -462,6 +517,41 @@ describe("HTTP API", () => { assert.ok(readBody.content.includes("Starting implementation")); }); + test("claim falls back to repo HEAD when requested base branch is missing", async () => { + const repoPath = path.join(tmpDir, "code-repo"); + initRepo(repoPath); + + await request("POST", "/tasks", { + title: "Claim fallback test", + description: "Claim should still create a code worktree when base branch is missing", + assignee: "coder", + repo: repoPath, + baseBranch: "missing-base", + skipAnalysis: true, + }); + + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const claimBody = claimRes.body as { + workspace?: { codeWorktree?: string | null }; + warnings?: string[]; + }; + assert.ok(claimBody.workspace?.codeWorktree); + assert.deepEqual(claimBody.warnings ?? [], []); + + const codeWorktree = claimBody.workspace?.codeWorktree!; + assert.equal(fs.existsSync(codeWorktree), true); + assert.equal( + execFileSync("git", ["branch", "--show-current"], { cwd: codeWorktree, encoding: "utf-8" }).trim(), + "task/T1", + ); + assert.equal(fs.existsSync(path.join(codeWorktree, "README.md")), true); + }); + test("status done completes execution task directly when no reviewer is configured", async () => { await request("POST", "/tasks", { title: "Direct completion", diff --git a/middle/test/worktree.test.ts b/middle/test/worktree.test.ts new file mode 100644 index 0000000..dfa7b0a --- /dev/null +++ b/middle/test/worktree.test.ts @@ -0,0 +1,63 @@ +import { spawnSync } from "node:child_process"; +import * as assert from "node:assert/strict"; +import * as path from "node:path"; +import { test } from "node:test"; +import { fileURLToPath, pathToFileURL } from "node:url"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); +const journalModuleUrl = pathToFileURL(path.join(repoRoot, "middle/journal.ts")).href; +const worktreeModuleUrl = pathToFileURL(path.join(repoRoot, "middle/worktree.ts")).href; + +test("task workspace bootstrap stays silent on stderr for fresh branches", () => { + const script = [ + 'import * as fs from "node:fs";', + 'import * as os from "node:os";', + 'import * as path from "node:path";', + `import { initJournalRepo, createTaskBranch, taskBranch } from ${JSON.stringify(journalModuleUrl)};`, + `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`, + 'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-noise-"));', + 'const repoPath = path.join(tmpDir, "journal");', + 'initJournalRepo(repoPath);', + 'createTaskBranch(repoPath, "1");', + 'createWorktree(repoPath, path.join(tmpDir, "journal-T1"), taskBranch("1"));', + ].join("\n"); + + const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], { + cwd: repoRoot, + encoding: "utf-8", + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.equal(result.stderr.trim(), ""); +}); + +test("createWorktree falls back to repo HEAD when start point is missing", () => { + const script = [ + 'import { execFileSync } from "node:child_process";', + 'import * as fs from "node:fs";', + 'import * as os from "node:os";', + 'import * as path from "node:path";', + `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`, + 'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-fallback-"));', + 'const repoPath = path.join(tmpDir, "repo");', + 'fs.mkdirSync(repoPath, { recursive: true });', + 'execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" });', + 'fs.writeFileSync(path.join(repoPath, "README.md"), "# test\\n", "utf-8");', + 'execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" });', + 'const worktreePath = path.join(tmpDir, "code-T1");', + 'createWorktree(repoPath, worktreePath, "task/T1", "missing-base");', + 'const branch = execFileSync("git", ["branch", "--show-current"], { cwd: worktreePath, encoding: "utf-8" }).trim();', + 'if (branch !== "task/T1") throw new Error(`unexpected branch ${branch}`);', + 'if (!fs.existsSync(path.join(worktreePath, "README.md"))) throw new Error("README.md missing from fallback worktree");', + ].join("\n"); + + const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], { + cwd: repoRoot, + encoding: "utf-8", + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); +}); diff --git a/middle/worktree.ts b/middle/worktree.ts index 2a4d959..a1f11af 100644 --- a/middle/worktree.ts +++ b/middle/worktree.ts @@ -23,13 +23,20 @@ export function createWorktree( // the worktree doesn't have the keys yet. const useGitCrypt = fs.existsSync(path.join(repoPath, ".git", "git-crypt")); const noCheckout = useGitCrypt ? "--no-checkout" : null; + const resolvedStartPoint = startPoint && refExists(repoPath, startPoint) + ? startPoint + : undefined; + const createBranch = !branchExists(repoPath, branch); const addArgs = (withNewBranch: boolean): string[] => { const args = ["worktree", "add"]; if (noCheckout) args.push(noCheckout); + if (withNewBranch) { + args.push("-b", branch); + } args.push(worktreePath); - if (withNewBranch && startPoint) { - args.push("-b", branch, startPoint); + if (withNewBranch) { + if (resolvedStartPoint) args.push(resolvedStartPoint); } else { args.push(branch); } @@ -38,11 +45,11 @@ export function createWorktree( const tryAdd = (): void => { try { - git(repoPath, addArgs(!!startPoint)); + git(repoPath, addArgs(createBranch)); } catch (err) { const msg = String(err); // Branch may already exist — retry without -b - if (startPoint && msg.includes("already exists")) { + if (createBranch && msg.includes("already exists")) { git(repoPath, addArgs(false)); } else { throw err; @@ -205,12 +212,35 @@ export function cleanupStaleWorktrees( // Helpers // --------------------------------------------------------------------------- +function branchExists(repoPath: string, branch: string): boolean { + return refExists(repoPath, `refs/heads/${branch}`); +} + +function refExists(repoPath: string, ref: string): boolean { + try { + execFileSync("git", ["rev-parse", "--verify", "--quiet", `${ref}^{commit}`], { + cwd: repoPath, + encoding: "utf-8", + timeout: 30_000, + stdio: ["ignore", "pipe", "pipe"], + env: { + ...process.env, + GIT_TERMINAL_PROMPT: "0", + }, + }); + return true; + } catch { + return false; + } +} + /** Run a git command. */ function git(cwd: string, args: string[]): string { return execFileSync("git", args, { cwd, encoding: "utf-8", timeout: 30_000, + stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, GIT_TERMINAL_PROMPT: "0", diff --git a/scripts/recover-leaked-leases.ts b/scripts/recover-leaked-leases.ts new file mode 100644 index 0000000..781f918 --- /dev/null +++ b/scripts/recover-leaked-leases.ts @@ -0,0 +1,94 @@ +type Phase = "analysis" | "decomposition" | "execution" | "review"; + +type AttemptBudget = { used: number; max: number }; + +type TaskRecord = { + id: string; + title: string; + phase: Phase | null; + condition: string | null; + terminal: string | null; + leasedTo: string | null; + leaseExpiresAt: number | null; + currentFenceToken: number; + attempts: Record; +}; + +type TasksResponse = { tasks: TaskRecord[] }; + +const apiBase = process.env["TASKCORE_BASE_URL"] ?? "http://127.0.0.1:18800"; +const dryRun = process.argv.includes("--dry-run"); +const retryDelayMs = 1_000; + +function nonEmptyText(value: string | null | undefined): value is string { + return typeof value === "string" && value.trim().length > 0; +} + +function leakedLeaseReason(task: TaskRecord, now: number): "lease_expired" | "orphaned_on_restart" { + if (task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) { + return "lease_expired"; + } + return "orphaned_on_restart"; +} + +function attemptNumber(task: TaskRecord, phase: Phase): number { + return Math.max(1, task.attempts[phase]?.used ?? 0); +} + +async function main(): Promise { + const now = Date.now(); + const response = await fetch(`${apiBase}/tasks?full=true`); + if (!response.ok) { + throw new Error(`Failed to fetch tasks: ${response.status} ${response.statusText}`); + } + + const body = await response.json() as TasksResponse; + const leaked = body.tasks.filter((task) => + task.terminal === null && + task.phase !== null && + task.condition === "active" && + (!nonEmptyText(task.leasedTo) || task.leaseExpiresAt === null || task.leaseExpiresAt <= now) + ); + + if (leaked.length === 0) { + process.stdout.write("No leaked active leases found.\n"); + return; + } + + for (const task of leaked) { + const reason = leakedLeaseReason(task, now); + const payload = { + type: "RetryScheduled", + taskId: task.id, + ts: Date.now(), + fenceToken: task.currentFenceToken, + reason, + retryAfter: Date.now() + retryDelayMs, + phase: task.phase, + attemptNumber: attemptNumber(task, task.phase), + }; + + if (dryRun) { + process.stdout.write(`[dry-run] would recover T${task.id} (${task.title}) with ${reason}\n`); + continue; + } + + const recoverResponse = await fetch(`${apiBase}/tasks/${task.id}/events`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + if (!recoverResponse.ok) { + const errorBody = await recoverResponse.text(); + throw new Error(`Failed to recover T${task.id}: ${recoverResponse.status} ${errorBody}`); + } + + process.stdout.write(`Recovered T${task.id} (${task.title}) with ${reason}\n`); + } +} + +void main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n`); + process.exitCode = 1; +});