diff --git a/.task b/.task index 4f33323..d3a31c8 100644 --- a/.task +++ b/.task @@ -1,10 +1,10 @@ { - "taskId": "1282", + "taskId": "2123", "phase": "execution", - "fenceToken": 3, - "sessionId": "c893aa20-a7b7-4112-9cc3-68c12a747bf1", - "journalPath": "/tmp/taskcore-worktrees/journal-T1282/tasks/T1282/", - "codeWorktree": "/tmp/taskcore-worktrees/code-T1282", - "claimedAt": 1773098423616, + "fenceToken": 10, + "sessionId": "7be3c6a7-358e-45c7-b5af-6c1c5a51b82e", + "journalPath": "/tmp/taskcore-worktrees/journal-T2123/tasks/T2123/", + "codeWorktree": "/tmp/taskcore-worktrees/code-T2123", + "claimedAt": 1773404837466, "reviewNotes": [] } diff --git a/core/cli/plan-parse.ts b/core/cli/plan-parse.ts new file mode 100644 index 0000000..1da2167 --- /dev/null +++ b/core/cli/plan-parse.ts @@ -0,0 +1,258 @@ +/** + * plan-parse.ts — parse markdown-ish plans into child task specs and allocate costs. + * + * Supports: + * - Checklist items: `- [ ] title` or `- [x] title` + * - Ordered items: `1. title` + * - Bullet items: `- title` + * - Headings: `# Section` — prefix subsequent items' titles with "Section: " + * - Continuation lines: indented (2+ spaces) lines after an item enrich its description + * - Trailing meta: `(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true)` + */ + +export interface ParsedItem { + title: string; + description: string; + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +export interface AllocatedItem { + title: string; + description: string; + cost: number; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +interface ItemMeta { + cost?: number; + assignee?: string; + reviewer?: string; + skipAnalysis?: boolean; +} + +function extractMeta(raw: string): { baseTitle: string; meta: ItemMeta } { + // Match trailing parenthetical: `title (key: val, key: val)` + const parenMatch = raw.match(/^(.*?)\s*\(([^)]+)\)\s*$/); + if (!parenMatch) return { baseTitle: raw.trim(), meta: {} }; + + const baseTitle = parenMatch[1]!.trim(); + const metaStr = parenMatch[2]!; + const meta: ItemMeta = {}; + + for (const pair of metaStr.split(",")) { + const colonIdx = pair.indexOf(":"); + if (colonIdx < 0) continue; + const key = pair.slice(0, colonIdx).trim().toLowerCase(); + const value = pair.slice(colonIdx + 1).trim(); + + switch (key) { + case "cost": { + const n = Number(value); + if (Number.isFinite(n) && n > 0) meta.cost = n; + break; + } + case "assignee": + if (value) meta.assignee = value; + break; + case "reviewer": + if (value) meta.reviewer = value; + break; + case "skip-analysis": + meta.skipAnalysis = value === "true" || value === "1" || value === "yes"; + break; + } + } + return { baseTitle, meta }; +} + +interface PendingItem { + title: string; + rawTitle: string; // item text without heading prefix, used as default description + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +/** + * Parse a markdown-ish plan text into ParsedItem[]. + * + * Item markers: + * - `- [ ] title` / `- [x] title` — checklist + * - `1. title` — ordered list + * - `- title` — plain bullet + * + * Headings (`# …`) set a context prefix applied to all following items until + * the next heading. + * + * Indented lines (2+ spaces) following an item are appended to its description. + * + * A trailing parenthetical `(key: value, …)` on any item line supplies metadata. + */ +export function parsePlan(text: string): ParsedItem[] { + const lines = text.split("\n"); + const items: ParsedItem[] = []; + let headingContext = ""; + let pending: PendingItem | null = null; + const descLines: string[] = []; + + function flush(): void { + if (!pending) return; + const description = descLines.length > 0 ? descLines.join("\n") : pending.rawTitle; + items.push({ + title: pending.title, + description, + cost: pending.cost, + assignee: pending.assignee, + reviewer: pending.reviewer, + skipAnalysis: pending.skipAnalysis, + }); + pending = null; + descLines.length = 0; + } + + for (const line of lines) { + const trimmed = line.trim(); + + // Blank lines — skip (don't flush; a blank between item and continuation is fine) + if (trimmed === "") continue; + + // Heading: reset context prefix + const headingMatch = trimmed.match(/^#{1,6}\s+(.+)$/); + if (headingMatch) { + flush(); + headingContext = headingMatch[1]!.trim(); + continue; + } + + // Checklist: `- [ ] title` or `- [x] title` (any char inside brackets) + const checklistMatch = trimmed.match(/^-\s+\[[^\]]*\]\s+(.+)$/); + // Ordered: `1. title` (must not have already matched checklist) + const orderedMatch = !checklistMatch ? trimmed.match(/^\d+\.\s+(.+)$/) : null; + // Bullet: `- title` (only when neither of the above matched) + const bulletMatch = !checklistMatch && !orderedMatch ? trimmed.match(/^-\s+(.+)$/) : null; + + const itemText = checklistMatch?.[1] ?? orderedMatch?.[1] ?? bulletMatch?.[1]; + + if (itemText !== undefined) { + flush(); + const { baseTitle, meta } = extractMeta(itemText); + const prefix = headingContext ? `${headingContext}: ` : ""; + pending = { + title: prefix + baseTitle, + rawTitle: baseTitle, + cost: meta.cost, + assignee: meta.assignee, + reviewer: meta.reviewer, + skipAnalysis: meta.skipAnalysis ?? false, + }; + continue; + } + + // Continuation: indented line (2+ spaces or a tab) while we have a pending item + if (pending && /^[ \t]{2,}/.test(line)) { + descLines.push(trimmed); + continue; + } + + // Unrecognised line — flush pending (don't silently swallow item boundaries) + flush(); + } + + flush(); + return items; +} + +/** + * Allocate costs to items. Items with an explicit `cost` keep it; items without + * share the remaining budget evenly. Uses integer arithmetic (cents) to avoid + * float drift. + * + * Returns an error string on failure, or the allocated items on success. + */ +export function allocateCosts( + items: ParsedItem[], + budgetRemainingDollars: number, +): { ok: true; items: AllocatedItem[] } | { ok: false; error: string } { + if (items.length === 0) { + return { ok: false, error: "plan contains no items" }; + } + + // Work in integer cents to avoid float drift + const budgetCents = Math.round(budgetRemainingDollars * 100); + if (budgetCents <= 0) { + return { + ok: false, + error: `no positive budget remaining (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + let specifiedCents = 0; + let unspecifiedCount = 0; + for (const item of items) { + if (item.cost !== undefined) { + specifiedCents += Math.round(item.cost * 100); + } else { + unspecifiedCount++; + } + } + + if (specifiedCents > budgetCents) { + return { + ok: false, + error: + `explicit costs (${(specifiedCents / 100).toFixed(2)}) exceed remaining budget` + + ` (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + const remainingCents = budgetCents - specifiedCents; + let perItemCents = 0; + let extraCents = 0; + + if (unspecifiedCount > 0) { + if (remainingCents <= 0) { + return { + ok: false, + error: `no budget remains for ${unspecifiedCount} item(s) without explicit cost`, + }; + } + perItemCents = Math.floor(remainingCents / unspecifiedCount); + extraCents = remainingCents % unspecifiedCount; + if (perItemCents === 0) { + return { + ok: false, + error: + `remaining budget (${(remainingCents / 100).toFixed(2)}) too small to distribute` + + ` across ${unspecifiedCount} uncosted item(s)`, + }; + } + } + + // Distribute extra cents to the first items (deterministic, no float drift) + let extraGiven = 0; + const allocated: AllocatedItem[] = items.map((item) => { + let costCents: number; + if (item.cost !== undefined) { + costCents = Math.round(item.cost * 100); + } else { + costCents = perItemCents + (extraGiven < extraCents ? 1 : 0); + if (extraGiven < extraCents) extraGiven++; + } + return { + title: item.title, + description: item.description, + cost: costCents / 100, + assignee: item.assignee, + reviewer: item.reviewer, + skipAnalysis: item.skipAnalysis, + }; + }); + + return { ok: true, items: allocated }; +} diff --git a/core/cli/task.ts b/core/cli/task.ts index 19192a2..9d71699 100644 --- a/core/cli/task.ts +++ b/core/cli/task.ts @@ -11,6 +11,7 @@ import { filterClaimableTasks, includeReviewQueueTask, } from "./claimability.js"; +import { allocateCosts, parsePlan } from "./plan-parse.js"; // Handle EPIPE errors gracefully (e.g., when piping to head) process.stdout.on("error", (err: NodeJS.ErrnoException) => { @@ -535,7 +536,9 @@ function phaseGuidance(phase: string, condition: string): string[] { case "decomposition.active": return [ "You're decomposing this task. Next steps:", - " task decompose start # begin decomposition", + " task plan --file plan.md # materialize a whole checklist in one shot", + " task decompose plan --stdin # same flow, reading markdown from stdin", + " task decompose start # fallback: build children incrementally", " task decompose add <...> # add a child task", " task decompose commit # finalize children", " task decompose cancel # cancel decomposition", @@ -768,7 +771,8 @@ function printHelp(): void { " task update ", " task analyze", " task decide ", - " task decompose ...", + " task plan | --file | --stdin # plan-based decomposition", + " task decompose ...", " task review ...", " task journal ...", " task worktree", @@ -1520,10 +1524,12 @@ async function cmdClaim(argv: string[], jsonMode: boolean): Promise { process.stdout.write(" task review reject --force — reject permanently (prefer request-changes)\n"); process.stdout.write(" task review request-changes \"notes\" — request changes\n"); } else if (phase === "decomposition") { - process.stdout.write(" task decompose start — begin decomposition\n"); - process.stdout.write(" task decompose add \"...\" — add a child task\n"); - process.stdout.write(" task decompose commit — finalize children\n"); - process.stdout.write(" task decompose cancel — cancel decomposition\n"); + process.stdout.write(" task plan --file plan.md — commit a whole checklist in one shot\n"); + process.stdout.write(" task decompose plan --stdin — same flow, reading markdown from stdin\n"); + process.stdout.write(" task decompose start — fallback: begin incremental decomposition\n"); + process.stdout.write(" task decompose add \"...\" — add a child task\n"); + process.stdout.write(" task decompose commit — finalize children\n"); + process.stdout.write(" task decompose cancel — cancel decomposition\n"); } else { process.stdout.write(" task submit \"what you did\" — when done\n"); process.stdout.write(" task complete \"evidence\" — done, no review needed\n"); @@ -2005,9 +2011,221 @@ async function cmdDecompose(argv: string[], jsonMode: boolean): Promise { return; } + case "plan": { + const { flags } = parseFlags(args); + + // Read plan text from --items, --file, or --stdin + let planText: string; + const itemsFlag = getFlagString(flags, "items"); + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (itemsFlag !== undefined) { + planText = itemsFlag; + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task decompose plan --items | --file | --stdin\n" + + " [--strategy sequential|parallel]", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan into items + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); + } + + // Fetch parent task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs (explicit + auto-distribution) + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array for the one-shot endpoint + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write("--- Decomposition committed ---\n\n"); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} children:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); + return; + } + default: - throw new CliError("Usage: task decompose ...", 1); + throw new CliError("Usage: task decompose ...", 1); + } +} + +/** + * `task plan` — top-level shortcut for plan-based decomposition. + * + * Usage: + * task plan "- [ ] step one\n- [ ] step two" + * task plan --file plan.md + * task plan --stdin + * task plan "- step one" --strategy parallel + * + * Equivalent to `task decompose plan` but more natural for agents. + * Accepts plan text as a positional arg (most common), --file, or --stdin. + */ +async function cmdPlan(argv: string[], jsonMode: boolean): Promise { + requireAgentId(); + const taskId = currentTaskId(); + const { positionals, flags } = parseFlags(argv); + + // Read plan text from positional arg, --file, or --stdin + let planText: string; + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (positionals.length > 0) { + planText = positionals.join(" "); + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task plan | --file | --stdin\n" + + " [--strategy sequential|parallel]\n" + + "\n" + + "Plan format:\n" + + " - [ ] Step title (cost: 10, assignee: coder)\n" + + " 1. Ordered step\n" + + " - Bullet step\n" + + " # Section heading\n" + + " Indented lines extend item descriptions.", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); } + + // Fetch task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write(`--- Plan materialized for T${taskId} ---\n\n`); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} child tasks:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + process.stdout.write("\nParent task is now a coordinator. Children will execute the plan.\n"); + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); } function ensureContextWithTaskId(taskId: string): TaskContext { @@ -2606,6 +2824,27 @@ const subcommandHelp: Record = { " decompose Task will be split into subtasks", ].join("\n"), + plan: [ + "task plan | --file | --stdin", + "", + "One-shot plan-based decomposition. Parses a markdown plan into", + "child tasks and materializes them under the current task.", + "Equivalent to `task decompose plan` but accepts plan as a positional arg.", + "", + "Options:", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format:", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", + ].join("\n"), + decompose: [ "task decompose ...", "", @@ -2620,6 +2859,19 @@ const subcommandHelp: Record = { " --skip-analysis Skip analysis phase for child", " commit Commit the decomposition", " cancel Cancel pending decomposition", + " plan One-shot plan-based decomposition", + " --items Inline plan text", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format (for 'plan' subcommand):", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", ].join("\n"), review: [ @@ -2799,6 +3051,9 @@ async function run(argv: string[]): Promise { case "decide": await cmdDecide(rest, jsonMode); return; + case "plan": + await cmdPlan(rest, jsonMode); + return; case "decompose": await cmdDecompose(rest, jsonMode); return; diff --git a/core/test/cli-plan.test.ts b/core/test/cli-plan.test.ts new file mode 100644 index 0000000..4d50269 --- /dev/null +++ b/core/test/cli-plan.test.ts @@ -0,0 +1,221 @@ +import * as http from "node:http"; +import { spawnSync } from "node:child_process"; +import { beforeEach, afterEach, test } from "node:test"; +import * as assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { OrchestrationCore } from "../index.js"; +import { createHttpServer } from "../../middle/http.js"; +import { loadConfig, type Config } from "../../middle/config.js"; +import { initJournalRepo } from "../../middle/journal.js"; + +let server: http.Server; +let core: OrchestrationCore; +let tmpDir: string; +let port: number; +let config: Config; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); + +function request( + method: string, + urlPath: string, + body?: unknown, +): Promise<{ status: number; body: unknown }> { + return new Promise((resolve, reject) => { + const data = body ? JSON.stringify(body) : undefined; + const req = http.request( + { + hostname: "127.0.0.1", + port, + path: urlPath, + method, + headers: data + ? { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + } + : {}, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (chunk: Buffer) => chunks.push(chunk)); + res.on("end", () => { + const raw = Buffer.concat(chunks).toString("utf-8"); + try { + resolve({ status: res.statusCode ?? 500, body: JSON.parse(raw) }); + } catch { + resolve({ status: res.statusCode ?? 500, body: raw }); + } + }); + }, + ); + req.on("error", reject); + if (data) req.write(data); + req.end(); + }); +} + +async function setup(): Promise { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-cli-plan-")); + const dbPath = path.join(tmpDir, "test.db"); + port = 19800 + Math.floor(Math.random() * 1000); + const journalRepoPath = path.join(tmpDir, "journal"); + const worktreeBaseDir = path.join(tmpDir, "worktrees"); + const workspaceDir = path.join(tmpDir, "workspace"); + const agentRegistry = path.join(tmpDir, "registry.json"); + + fs.mkdirSync(workspaceDir, { recursive: true }); + fs.mkdirSync(path.join(workspaceDir, "data"), { recursive: true }); + fs.writeFileSync(agentRegistry, JSON.stringify({ + agents: [ + { id: "coder", assignable: true, reviewer: true, consulted: true }, + { id: "overseer", assignable: true, reviewer: true, consulted: true }, + ], + }, null, 2)); + initJournalRepo(journalRepoPath); + + core = new OrchestrationCore({ + dbPath, + invariantChecks: true, + snapshotEvery: 50, + }); + + config = { + ...loadConfig(), + port, + dbPath, + agentRegistry, + workspaceDir, + journalRepoPath, + worktreeBaseDir, + runtimeFile: "", + }; + + server = createHttpServer(core, config); + await new Promise((resolve) => { + server.listen(port, "127.0.0.1", resolve); + }); +} + +async function teardown(): Promise { + await new Promise((resolve) => server.close(() => resolve())); + core.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +} + +beforeEach(setup); +afterEach(teardown); + +test("task plan materializes a markdown checklist into child tasks end-to-end", async () => { + const createRes = await request("POST", "/tasks", { + title: "Plan import parent", + description: "Verify task plan end-to-end", + assignee: "coder", + costBudget: 25, + }); + assert.equal(createRes.status, 201); + + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const planPath = path.join(tmpDir, "plan.md"); + fs.writeFileSync(planPath, [ + "# Planning", + "- [ ] Parse markdown checklist (cost: 5, assignee: coder)", + " Capture headings and metadata.", + "- [ ] Materialize children (reviewer: overseer)", + " Verify the parent/child shape in taskcore/dashboard.", + ].join("\n")); + + const cli = spawnSync( + process.execPath, + ["--import", "tsx", "core/cli/task.ts", "plan", "--file", planPath], + { + cwd: repoRoot, + env: { + ...process.env, + ORCHESTRATOR_PORT: String(port), + TASKCORE_AGENT_ID: "coder", + TASK_ID: "1", + }, + encoding: "utf-8", + }, + ); + + assert.equal(cli.status, 0, `stdout:\n${cli.stdout}\n\nstderr:\n${cli.stderr}`); + assert.match(cli.stdout, /Plan materialized for T1/); + assert.match(cli.stdout, /Created 2 child tasks/); + + const parentRes = await request("GET", "/tasks/1"); + assert.equal(parentRes.status, 200); + const parent = (parentRes.body as { + task: { + phase: string; + condition: string; + children: string[]; + coordination?: { mode?: string }; + }; + }).task; + + assert.equal(parent.phase, "analysis"); + assert.equal(parent.condition, "waiting"); + assert.deepEqual(parent.children, ["2", "3"]); + assert.equal(parent.coordination?.mode, "sequential_children"); + + const childrenRes = await request("GET", "/tasks?parentId=1&full=true"); + assert.equal(childrenRes.status, 200); + const children = (childrenRes.body as { + tasks: Array<{ + id: string; + title: string; + description: string; + phase: string; + condition: string; + metadata: { assignee?: string; reviewer?: string }; + cost: { allocated: number }; + }>; + }).tasks; + + assert.equal(children.length, 2); + assert.deepEqual( + children.map((child) => ({ + id: child.id, + title: child.title, + description: child.description, + phase: child.phase, + condition: child.condition, + assignee: child.metadata.assignee, + reviewer: child.metadata.reviewer, + allocated: child.cost.allocated, + })), + [ + { + id: "2", + title: "Planning: Parse markdown checklist", + description: "Capture headings and metadata.", + phase: "analysis", + condition: "ready", + assignee: "coder", + reviewer: undefined, + allocated: 5, + }, + { + id: "3", + title: "Planning: Materialize children", + description: "Verify the parent/child shape in taskcore/dashboard.", + phase: "analysis", + condition: "waiting", + assignee: undefined, + reviewer: "overseer", + allocated: 20, + }, + ], + ); +}); diff --git a/core/test/plan-parse.test.ts b/core/test/plan-parse.test.ts new file mode 100644 index 0000000..e1e4543 --- /dev/null +++ b/core/test/plan-parse.test.ts @@ -0,0 +1,269 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { allocateCosts, parsePlan } from "../cli/plan-parse.js"; + +// --------------------------------------------------------------------------- +// Parser tests +// --------------------------------------------------------------------------- + +test("parsePlan: checklist items", () => { + const items = parsePlan("- [ ] First step\n- [ ] Second step"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: checked and unchecked checklist both parse", () => { + const items = parsePlan("- [ ] Pending\n- [x] Done\n- [X] Also done"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Pending"); + assert.equal(items[1]!.title, "Done"); + assert.equal(items[2]!.title, "Also done"); +}); + +test("parsePlan: ordered list items", () => { + const items = parsePlan("1. Alpha\n2. Beta\n3. Gamma"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Alpha"); + assert.equal(items[1]!.title, "Beta"); + assert.equal(items[2]!.title, "Gamma"); +}); + +test("parsePlan: plain bullet items", () => { + const items = parsePlan("- Do thing A\n- Do thing B"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Do thing A"); + assert.equal(items[1]!.title, "Do thing B"); +}); + +test("parsePlan: headings prefix subsequent items", () => { + const items = parsePlan("# Setup\n- Install deps\n# Cleanup\n- Remove temp files"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Setup: Install deps"); + assert.equal(items[1]!.title, "Cleanup: Remove temp files"); +}); + +test("parsePlan: heading context resets on new heading", () => { + const items = parsePlan("# Phase 1\n- Item A\n# Phase 2\n- Item B"); + assert.equal(items[0]!.title, "Phase 1: Item A"); + assert.equal(items[1]!.title, "Phase 2: Item B"); +}); + +test("parsePlan: no heading means no prefix", () => { + const items = parsePlan("- Plain item"); + assert.equal(items[0]!.title, "Plain item"); +}); + +test("parsePlan: indented continuation lines become description", () => { + const items = parsePlan( + "- Do the thing\n With extra context here\n And more details", + ); + assert.equal(items.length, 1); + assert.equal(items[0]!.title, "Do the thing"); + assert.equal(items[0]!.description, "With extra context here\nAnd more details"); +}); + +test("parsePlan: description defaults to item text when no continuations", () => { + const items = parsePlan("- Simple item"); + assert.equal(items[0]!.description, "Simple item"); +}); + +test("parsePlan: blank lines between items are ignored", () => { + const items = parsePlan("- First\n\n- Second\n\n- Third"); + assert.equal(items.length, 3); +}); + +test("parsePlan: unrecognised prose flushes the pending item", () => { + const items = parsePlan([ + "- First step", + "This paragraph should not become a continuation line", + "- Second step", + ].join("\n")); + + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[0]!.description, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: inline cost metadata", () => { + const items = parsePlan("- Do something (cost: 15)"); + assert.equal(items[0]!.title, "Do something"); + assert.equal(items[0]!.cost, 15); +}); + +test("parsePlan: inline assignee and reviewer", () => { + const items = parsePlan("- Do something (assignee: coder, reviewer: overseer)"); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "overseer"); +}); + +test("parsePlan: skip-analysis flag true", () => { + const items = parsePlan("- Trivial task (skip-analysis: true)"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: skip-analysis flag false", () => { + const items = parsePlan("- Normal task (skip-analysis: false)"); + assert.equal(items[0]!.skipAnalysis, false); +}); + +test("parsePlan: combined metadata", () => { + const items = parsePlan( + "- Full item (cost: 20, assignee: coder, reviewer: lead, skip-analysis: true)", + ); + assert.equal(items[0]!.title, "Full item"); + assert.equal(items[0]!.cost, 20); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "lead"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: metadata stripped from title", () => { + const items = parsePlan("- Fix the bug (cost: 5)"); + assert.equal(items[0]!.title, "Fix the bug"); + assert.equal(items[0]!.cost, 5); +}); + +test("parsePlan: heading prefix not applied to description", () => { + const items = parsePlan("# Phase\n- Task title"); + assert.equal(items[0]!.title, "Phase: Task title"); + assert.equal(items[0]!.description, "Task title"); // description has no heading prefix +}); + +test("parsePlan: mixed formats in one plan", () => { + const plan = [ + "# Bootstrap", + "- [ ] Install dependencies (cost: 5)", + "1. Configure environment (cost: 10, assignee: infra)", + "# Feature", + "- Build the thing", + " Detailed instructions here", + ].join("\n"); + + const items = parsePlan(plan); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Bootstrap: Install dependencies"); + assert.equal(items[0]!.cost, 5); + assert.equal(items[1]!.title, "Bootstrap: Configure environment"); + assert.equal(items[1]!.cost, 10); + assert.equal(items[1]!.assignee, "infra"); + assert.equal(items[2]!.title, "Feature: Build the thing"); + assert.equal(items[2]!.description, "Detailed instructions here"); +}); + +// --------------------------------------------------------------------------- +// Cost allocation tests +// --------------------------------------------------------------------------- + +test("allocateCosts: errors on empty items", () => { + const result = allocateCosts([], 100); + assert.equal(result.ok, false); +}); + +test("allocateCosts: errors on zero budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, 0); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no positive budget/); +}); + +test("allocateCosts: errors on negative budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, -5); + assert.equal(result.ok, false); +}); + +test("allocateCosts: uses explicit costs as-is", () => { + const items = parsePlan("- Step A (cost: 10)\n- Step B (cost: 20)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 20); +}); + +test("allocateCosts: distributes remaining budget evenly", () => { + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 10); + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: distributes remainder cents to first items", () => { + // $10.01 across 3 items → 3.34, 3.34, 3.33 (1 extra cent to first two) + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 10.01); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 1001); + assert.equal(result.items[0]!.cost, 3.34); + assert.equal(result.items[1]!.cost, 3.34); + assert.equal(result.items[2]!.cost, 3.33); +}); + +test("allocateCosts: mixes explicit and auto-distributed costs", () => { + const items = parsePlan("- A (cost: 10)\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); // explicit + assert.equal(result.items[1]!.cost, 10); // (30 - 10) / 2 + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: total equals budget when all auto", () => { + const items = parsePlan("- A\n- B"); + const result = allocateCosts(items, 50); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 5000); +}); + +test("allocateCosts: errors if explicit costs exceed budget", () => { + const items = parsePlan("- A (cost: 60)\n- B (cost: 50)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /exceed/); +}); + +test("allocateCosts: errors if no budget left for unspecified items", () => { + const items = parsePlan("- A (cost: 100)\n- B"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no budget/); +}); + +test("allocateCosts: preserves assignee and reviewer on items", () => { + const items = parsePlan("- Task (cost: 10, assignee: coder, reviewer: lead)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.assignee, "coder"); + assert.equal(result.items[0]!.reviewer, "lead"); +}); + +test("allocateCosts: preserves skipAnalysis flag", () => { + const items = parsePlan("- Task (cost: 10, skip-analysis: true)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.skipAnalysis, true); +}); + +test("allocateCosts: single item gets full budget when no cost specified", () => { + const items = parsePlan("- Single task"); + const result = allocateCosts(items, 25); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 25); +}); diff --git a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md new file mode 100644 index 0000000..699d221 --- /dev/null +++ b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md @@ -0,0 +1,159 @@ +# T1710 — Capability-First Orchestration Hardening + +## Executive recommendation + +This program is **too large and cross-cutting to execute as a single task**. It should be decomposed into ordered child tasks with one parent artifact that fixes the architecture, sequencing, and acceptance criteria. + +The failure pattern is not "one bug". It is a systems problem caused by trying to decompose and execute uncertain missions **before** the system knows: +- what capabilities are actually available, +- which prerequisites are missing, +- which steps are reversible vs. irreversible, +- how partial progress should be preserved, +- when repeated failure should trigger a strategy change instead of more retries. + +## Problem framing + +The target workflow class has five characteristics: +1. **High uncertainty at start** — entity matching, environment state, or account status may be unknown. +2. **Browser-mediated execution** — progress depends on live UI state, auth, and fragile selectors. +3. **Partially irreversible actions** — clicks, submissions, trades, messages, or confirmations may have consequences. +4. **Mixed research + execution** — discovery work is often bundled with deterministic action steps. +5. **Infrastructure noise** — browser relay, auth state, tool health, and mutation-path failures create false task churn. + +If the orchestrator treats these as ordinary deterministic tasks, it creates the same failure loop: +- decompose too early, +- assign execution before readiness, +- lose partial findings in review/aggregate handoffs, +- retry the same failing path, +- escalate risk near irreversible steps. + +## Strategic design principles + +### 1. Capability-first before decomposition +Before generating child tasks, the system should produce a **mission capability snapshot**: +- available tools and runtimes, +- authenticated systems/accounts, +- browser availability and attachment state, +- permission constraints, +- verification channels, +- human-approval requirements, +- known blockers. + +If readiness is low, the system should create **prerequisite tasks** first, not execution tasks. + +### 2. Separate discovery from deterministic execution +A mission should not begin with an execution plan when the main unknown is still identification, feasibility, auth, or state verification. + +Use two lanes: +- **Discovery lane**: identify entities, inspect environment, map options, and gather evidence. +- **Execution lane**: perform deterministic, validated steps only after inputs are stable. + +This preserves operator clarity and reduces bogus "execution failures" that are really unresolved discovery problems. + +### 3. Preserve partial progress as first-class artifacts +When a child task uncovers verified facts but cannot finish the end-to-end mission, that output must survive reviews, retries, and replanning. + +Required artifact types: +- capability snapshots, +- matched entities / rejected candidates, +- prerequisite checklist state, +- evidence bundles, +- environment fingerprints, +- execution-ready plans, +- approval packets for irreversible actions. + +The parent should aggregate these artifacts instead of forcing children into a binary success/fail shape. + +### 4. Fingerprint failure modes, then switch strategy +Repeated retries are only rational if the failure mode is transient. The orchestrator should classify failures into buckets such as: +- auth missing/expired, +- browser relay unattached, +- selector / UI drift, +- external system ambiguity, +- runtime/tool unavailable, +- mutation accepted but verification unavailable, +- approval required but not granted. + +Each class needs a defined next action: retry, reroute, decompose prerequisite, request approval, or stop. + +### 5. Approval-gated lane for irreversible actions +Irreversible or safety-sensitive actions should require a specific lane with: +- explicit action summary, +- preconditions satisfied, +- target/entity verified, +- rollback possibilities documented, +- approval token or human confirmation captured, +- post-action verification defined. + +This should not share the same semantics as low-risk research tasks. + +## Proposed decomposition + +### Child 1 — Mission capability registry and readiness model +**Goal:** define machine-readable representation of capabilities, prerequisites, and readiness scoring. +**Output:** schema + readiness levels + examples + integration points. + +### Child 2 — Execution preflight and prerequisite detection +**Goal:** build the gate that runs before decomposition/execution to detect missing auth, tools, browser state, permissions, and verification channels. +**Output:** preflight rules, fail-fast decisions, prerequisite task generation rules. + +### Child 3 — Grounding and uncertain-entity evaluation framework +**Goal:** handle missions where the target entity, account, page, or record is uncertain. +**Output:** match confidence model, evidence requirements, safe stopping conditions. + +### Child 4 — Failure fingerprinting and strategy switching +**Goal:** stop naive retry loops and route repeated failures to the right next strategy. +**Output:** failure taxonomy, retry budgets, switching rules, observability requirements. +**Design artifact:** `docs/ops/t1712-failure-fingerprinting-strategy-switching.md` + +### Child 5 — Approval-gated irreversible-action lane +**Goal:** create a separate workflow for steps with material consequences. +**Output:** approval packet schema, gate conditions, execution/verification semantics. + +### Child 6 — Aggregate policy and artifact-first closure semantics +**Goal:** preserve partial progress through review and parent aggregation. +**Output:** child completion semantics, artifact contract, parent merge rules, review checklist. + +### Child 7 — Cross-repo implementation and validation plan +**Goal:** map where changes belong across taskcore and colony, sequence rollout, and define tests. +**Output:** implementation order, repo ownership, migration plan, acceptance tests. + +## Ordering recommendation + +Recommended execution order: +1. Capability registry and readiness model +2. Execution preflight and prerequisite detection +3. Grounding / uncertain-entity evaluation +4. Failure fingerprinting and strategy switching +5. Approval-gated lane for irreversible actions +6. Aggregate policy and artifact-first closure semantics +7. Cross-repo implementation and validation plan + +Rationale: +- readiness and preflight are foundation layers, +- grounding determines whether execution should even begin, +- failure handling is only useful after readiness semantics exist, +- approval gating depends on stable preconditions and verification semantics, +- aggregate closure should be shaped after artifact types are defined, +- implementation planning should be last so it reflects the final architecture. + +## Acceptance criteria for the parent task + +T1710 should only be considered complete when it produces: +- a parent architecture memo with the final system model, +- child tasks covering all six functional areas plus rollout/validation, +- explicit artifact contracts between children, +- a recommended order of implementation, +- concrete acceptance tests for the integrated workflow. + +## What not to do + +- Do **not** patch a single historical workflow. +- Do **not** encode browser-specific hacks as general orchestration policy. +- Do **not** collapse discovery, execution, and approval into one task type. +- Do **not** use success/failure alone as the parent aggregation model. +- Do **not** allow irreversible execution without explicit preconditions and approval semantics. + +## Immediate next move + +Decompose T1710 into the ordered child tasks above, using domain-agnostic language and artifact-focused outputs. The parent remains responsible for the integrated architecture and rollout sequence. diff --git a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md new file mode 100644 index 0000000..576c279 --- /dev/null +++ b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md @@ -0,0 +1,449 @@ +# T1712 — Failure fingerprinting, strategy switching, and dynamic retry budgets + +## Problem statement + +TaskCore currently treats most agent failures as variations of the same event: +- `task-executor.mjs` increments a single `retryCount` +- work is re-queued with generic exponential backoff +- only rate limits get a distinct path +- once `MAX_RETRIES` is exhausted, the task is simply blocked + +That is too lossy for uncertain, browser-mediated, or dependency-heavy work. The system cannot distinguish: +- a stale login that should wake a shared auth blocker +- anti-bot / access denial that should halt sibling work on the same target +- missing inputs that should create a prerequisite task instead of more retries +- invalid state transitions that require replanning, not repetition +- verification failures after a mutation, where autonomy should stop and escalate +- repeated cost / provider exhaustion, where dispatch should shift strategy globally + +The result is sibling churn: multiple leaves burn retries on the same blocker even when the next rational move is shared blocker-removal or mission replanning. + +--- + +## Design goals + +1. **Canonical failure identity** — repeated failures with the same operative cause produce the same fingerprint. +2. **Scope-aware routing** — leaf-local failures stay local; shared blockers fan in to one prerequisite task; global exhaustion triggers broader throttling. +3. **Strategy switching over blind retries** — retry only when the failure class is plausibly transient. +4. **Task-kind-aware budgets** — execution, aggregate, and artifact-only tasks do not share the same retry policy. +5. **Inspectable decisions** — every retry, pause, reroute, and blocker promotion is visible in task metadata and executor outcome logs. +6. **Fail-closed near irreversible work** — verification and state-transition failures on execution tasks escalate before more automation is attempted. + +### Non-goals + +- Replacing provider allocation gating from T755. +- Replacing the recovery breaker engine for host/service remediation. +- Solving target grounding/entity ambiguity (that belongs to T1704/T2110-style grounding work). + +--- + +## Where this policy plugs in + +Primary integration points: +1. **`scripts/task-executor.mjs`** — authoritative classification, retry budgeting, and routing. +2. **task metadata in `.taskmaster/tasks/tasks.json`** — persistent fingerprint, counters, blocker linkage, and route decisions. +3. **executor outcome log** (`data/task-dashboard/executor_outcomes.jsonl`) — append fingerprint + routing evidence. +4. **dashboard export / APIs** — surface repeated failure clusters, promoted blockers, and retry-budget exhaustion. + +This task defines the contract and routing model. A later execution task should implement the plumbing. + +--- + +## 1) Canonical failure fingerprint model + +A **failure fingerprint** is the deduplicated identity of a task failure for orchestration purposes. + +### Proposed schema + +```json +{ + "version": "v1", + "fingerprintId": "ffp_01HV...", + "taskId": 1712, + "taskKind": "execution", + "runPhase": "work", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "resourceKey": "telegram:account:primary", + "surface": "browser_relay", + "reasonCode": "session_expired", + "signature": { + "provider": "openai-codex", + "model": "gpt-5.4", + "exitType": "agent_crash", + "stderrClass": "login_required", + "targetRef": "telegram-web" + }, + "dedupeKey": "auth_session_failure|telegram:account:primary|browser_relay|session_expired", + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "attempts": 3, + "affectedTaskIds": [1712, 1718, 1721], + "recommendedStrategy": "wake_or_create_blocker", + "recommendedBlockerKey": "blocker:auth:telegram:account:primary" +} +``` + +### Required fields + +| Field | Meaning | +|---|---| +| `taskKind` | `execution`, `aggregate`, `artifact_only`, `review`, `capability_probe`, etc. | +| `runPhase` | `work` or `review` | +| `failureClass` | canonical orchestrator-facing class | +| `scope` | `leaf_local`, `shared_prerequisite`, or `global_budget` | +| `resourceKey` | normalized target of the blocker (`browser:relay:chrome`, `human:kas`, `provider:openai-codex`) | +| `reasonCode` | finer-grained sub-cause | +| `dedupeKey` | stable routing key used to collapse repeats | +| `attempts` | count of repeated hits within policy window | +| `recommendedStrategy` | `retry_same_path`, `switch_strategy`, `wake_or_create_blocker`, `pause_for_review`, `global_throttle` | + +### Scope semantics + +- **`leaf_local`** — retry/replan only this task. Example: malformed prompt for one artifact-only task. +- **`shared_prerequisite`** — stop retrying sibling leaves and promote a shared blocker. Example: expired auth, missing credential, inaccessible website. +- **`global_budget`** — provider/cost saturation or broad platform outage; gate future dispatch and avoid local churn. + +--- + +## 2) Canonical failure classes + +These are the minimum classes required for T1712 acceptance. + +| Failure class | Typical signals | Default scope | Default strategy | +|---|---|---|---| +| `auth_session_failure` | login required, expired cookie/session, wallet disconnected, missing permission grant | `shared_prerequisite` | pause affected leaves, wake/create auth blocker | +| `access_denial_antibot` | captcha, 403, antibot page, WAF deny, account challenge | `shared_prerequisite` | stop automation path, request human unblock / alternate channel | +| `missing_input` | required ID/file/approval/parameter absent | `shared_prerequisite` if shared, else `leaf_local` | create prerequisite task or request user input | +| `invalid_state_transition` | task tries action from wrong state, precondition invalid, already-submitted/closed/cancelled | `leaf_local` or `shared_prerequisite` if state is mission-wide | replan from refreshed state; no same-path retry | +| `verification_failure` | mutation possibly happened but postcondition cannot be proven; conflicting checks | `leaf_local` on single leaf, fail-closed for execution | halt autonomy, require verification/review path | +| `cost_exhaustion_repeated` | provider denied for budget/quota reasons across attempts/windows | `global_budget` | throttle dispatch, shift provider/model/priority policy | + +Recommended additional classes for implementation completeness: +- `tool_runtime_unavailable` +- `ui_selector_drift` +- `rate_limit_transient` +- `dependency_blocked` +- `human_approval_missing` + +### Failure-class notes + +#### `auth_session_failure` +Examples: +- browser relay attached but session logged out +- API token missing or expired +- wallet connector present but no connected account + +Rule: after the second matching hit within the policy window, stop local retries and create/wake one shared auth blocker keyed by the affected account/resource. + +#### `access_denial_antibot` +Rule: never let sibling leaves keep probing the same blocked surface. Switch to a human-assisted or alternate-channel strategy immediately after first confirmed match. + +#### `missing_input` +Rule: if the missing input is shared by multiple children (e.g. target account id, approval token, attachment), collapse it into one prerequisite task and mark dependent leaves as waiting/blocked-by-dependency. + +#### `invalid_state_transition` +Rule: do not spend retry budget repeating an action against a stale assumption. Refresh state, then either replan or close as not-applicable. + +#### `verification_failure` +Rule: for execution tasks, verification failure after a mutation is **not retry-equivalent** to a normal crash. The system must stop and request human review or explicit verification work. + +#### `cost_exhaustion_repeated` +Rule: once the same quota/budget fingerprint repeats across tasks or time windows, it becomes a dispatch-policy problem, not a leaf problem. Route to global throttling or provider switch. + +--- + +## 3) Fingerprint derivation rules + +The executor should derive a fingerprint in four passes: + +1. **Normalize runtime facts** + - task kind + - run phase + - assignee/reviewer + - exit code / signal + - error tail classification + - known provider/model metadata + - target resource / surface + +2. **Assign failure class** + - use deterministic rule table before any model-based classifier + - allow only a bounded fallback classifier for unknown cases + +3. **Resolve routing scope** + - infer whether the cause is leaf-local, shared prerequisite, or global budget + +4. **Build dedupe key** + - `failureClass | resourceKey | surface | reasonCode` + - exclude volatile text (timestamps, raw stack traces, run ids) + +### Example derivations + +```text +stderr: "Telegram Web shows login required" +=> failureClass=auth_session_failure +=> resourceKey=telegram:web:primary +=> scope=shared_prerequisite +=> dedupeKey=auth_session_failure|telegram:web:primary|browser_relay|login_required +``` + +```text +stderr: "429 Too Many Requests from provider openai-codex" +=> failureClass=rate_limit_transient +=> resourceKey=provider:openai-codex +=> scope=leaf_local (single task) OR global_budget once repeated threshold trips +=> dedupeKey=rate_limit_transient|provider:openai-codex|dispatch|429 +``` + +```text +stderr: "proposal already published" +=> failureClass=invalid_state_transition +=> resourceKey=proposal:1234 +=> scope=shared_prerequisite if many leaves assume draft state +=> dedupeKey=invalid_state_transition|proposal:1234|mutation_path|already_published +``` + +--- + +## 4) Strategy-switching rules + +### Canonical strategies + +| Strategy | Use when | Result | +|---|---|---| +| `retry_same_path` | transient/local issue and retry budget remains | requeue same task with backoff | +| `switch_strategy` | same objective still valid but current path is irrational | reroute to alternate tool/channel/plan | +| `wake_or_create_blocker` | repeated shared blocker across leaves | create or wake one prerequisite task and pause dependents | +| `pause_for_review` | verification or safety-sensitive ambiguity | send to review / human confirmation | +| `global_throttle` | provider or budget exhaustion spans multiple tasks | deny/defer future dispatch until healthy | + +### Routing decision table + +| Failure class | First hit | Repeated hit | Exhausted state | +|---|---|---|---| +| `auth_session_failure` | retry once if evidence is weak; otherwise create blocker immediately | wake/create shared auth blocker; pause sibling leaves | mark dependency blocker and stop automation until prerequisite closes | +| `access_denial_antibot` | stop same-path retries; request alternate route | shared blocker + human review | quarantine target surface for cooldown window | +| `missing_input` | create/wake prerequisite or ask for input | collapse siblings onto same blocker | leave waiting on prerequisite, no further retries | +| `invalid_state_transition` | refresh state and re-evaluate | replan or close as superseded | no further retries on stale path | +| `verification_failure` | require explicit verification task / review | block execution lane on target | escalate to human with evidence bundle | +| `cost_exhaustion_repeated` | apply local defer/backoff | trigger provider/model/policy switch or dispatch gate deny | global throttle until healthy window returns | + +### Shared blocker promotion rule + +When all of the following hold, the executor promotes a blocker task: +1. `scope == shared_prerequisite` +2. same `dedupeKey` occurs on **>= 2 tasks** or **>= 2 attempts on one task** within the policy window +3. a blocker with the same `recommendedBlockerKey` is not already active + +Result: +- create or wake one blocker task +- attach `metadata.failureFingerprint.blockerTaskId` +- mark affected leaves as dependency-blocked / waiting on that blocker +- suppress additional same-fingerprint retries until blocker state changes + +### Replanning rule + +If a task hits `invalid_state_transition` or `verification_failure`, the next action should be a replan/verification step, not another leaf retry. The executor should either: +- create a child task for state refresh / verification, or +- send the task back to review with the fingerprint attached. + +--- + +## 5) Dynamic retry budget policy + +Retry budgets must be keyed by **task kind** and **failure class**, not one global `MAX_RETRIES`. + +### Policy table (recommended v1) + +| Task kind | Failure class | Auto retries | Backoff class | On exhaustion | +|---|---|---:|---|---| +| `execution` | `rate_limit_transient` | 2 | long | switch provider or defer via gate | +| `execution` | `auth_session_failure` | 1 | short | create/wake auth blocker | +| `execution` | `access_denial_antibot` | 0 | none | human unblock / alternate path | +| `execution` | `missing_input` | 0 | none | prerequisite task | +| `execution` | `invalid_state_transition` | 0 | none | refresh + replan | +| `execution` | `verification_failure` | 0 | none | review / verification task | +| `execution` | `tool_runtime_unavailable` | 1 | medium | reroute tool / pause | +| `aggregate` | `dependency_blocked` | 0 | none | wait for required children / blocker | +| `aggregate` | `missing_input` | 0 | none | request missing artifact coverage | +| `aggregate` | `tool_runtime_unavailable` | 1 | short | rerun reducer/export path | +| `aggregate` | `verification_failure` | 1 | short | review aggregate evidence | +| `artifact_only` | `tool_runtime_unavailable` | 2 | short | reroute agent/tool | +| `artifact_only` | `missing_input` | 0 | none | request source material | +| `artifact_only` | `invalid_state_transition` | 0 | none | usually close/supersede, not retry | +| `artifact_only` | `cost_exhaustion_repeated` | 1 | long | defer until budget recovers | +| `review` | `tool_runtime_unavailable` | 1 | short | reroute reviewer | +| `review` | `verification_failure` | 0 | none | escalate to human reviewer | + +### Why the policies differ + +- **Execution tasks** carry the most risk; most non-transient classes should not auto-retry. +- **Aggregate tasks** should rarely retry; repeated child blockers are usually dependency issues, not execution failures. +- **Artifact-only tasks** can tolerate slightly more retry on tooling failures because they do not directly mutate external state. + +### Budget accounting model + +Track two counters per task attempt window: +1. **`pathRetryCount`** — retries on the same strategy/path +2. **`strategySwitchCount`** — number of alternate paths already tried + +This prevents a task from escaping budget control by bouncing endlessly between weak alternatives. + +Recommended defaults: +- `execution`: `pathRetryCount <= 2`, `strategySwitchCount <= 1` +- `aggregate`: `pathRetryCount <= 1`, `strategySwitchCount <= 1` +- `artifact_only`: `pathRetryCount <= 2`, `strategySwitchCount <= 2` + +--- + +## 6) Proposed metadata contract + +Add the following metadata shape to task records: + +```json +{ + "metadata": { + "retryPolicy": { + "taskKind": "execution", + "pathRetryCount": 1, + "strategySwitchCount": 0, + "budgetWindow": "30m", + "lastBudgetDecision": "wake_or_create_blocker" + }, + "failureFingerprint": { + "fingerprintId": "ffp_01HV...", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "dedupeKey": "auth_session_failure|telegram:web:primary|browser_relay|login_required", + "attemptsWindow": 2, + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "recommendedStrategy": "wake_or_create_blocker", + "blockerTaskId": 1730 + }, + "blockedByFingerprint": true, + "sharedBlockerKey": "blocker:auth:telegram:web:primary" + } +} +``` + +### Executor outcome log extension + +Each `executor_outcomes.jsonl` record should append: +- `taskKind` +- `failureClass` +- `fingerprintDedupeKey` +- `routingDecision` +- `scope` +- `strategySwitched` (bool) +- `blockerTaskId` (if any) + +This is the minimum observability needed to prove sibling churn actually dropped after rollout. + +--- + +## 7) Shared blocker lifecycle + +### Blocker task creation contract + +When promoting shared blocker work, the system should create a task with: +- kind: `capability_probe` or `blocker_removal` +- title: deterministic and resource-based +- description: include fingerprint class, affected resource, and evidence bundle +- metadata: + - `blockerKey` + - `sourceFingerprint` + - `affectedTaskIds` + - `createdFromFailureRouter=true` + +### Wake vs create + +- **Wake existing blocker** when an active/pending blocker has the same `blockerKey`. +- **Create new blocker** only when no active blocker exists. + +### Leaf behavior while blocker active + +Affected leaves should not continue ordinary retry scheduling. Instead they should move to a dependency-held condition with: +- blocker task id +- blocker key +- fingerprint id +- timestamp of last routed match + +This is the mechanism that stops sibling retry burn. + +--- + +## 8) Observability and dashboards + +Required dashboard/reporting surfaces: +1. **Top repeated fingerprints** in the last 24h +2. **Shared blocker promotions** and number of leaves collapsed behind each blocker +3. **Retries avoided** after blocker promotion +4. **Failure-class breakdown by task kind** +5. **Global budget throttles** with provider/model linkage + +Key success metrics: +- drop in repeated identical failures per sibling set +- increase in blocker reuse rate +- reduction in tasks blocked only after max retries +- fewer execution tasks auto-retrying on verification failures + +--- + +## 9) Acceptance tests for the implementation task + +### A. Shared auth blocker +1. Two execution leaves fail with the same expired-session fingerprint. +2. System creates/wakes one auth blocker. +3. Sibling leaves stop consuming retries. +4. Both leaves reference the same blocker task id/key. + +### B. Anti-bot/access denial +1. First confirmed anti-bot fingerprint occurs on an execution task. +2. Executor does **not** requeue same path. +3. Alternate route or human unblock task is requested. + +### C. Missing shared input +1. Multiple tasks lack the same approval token / file. +2. One prerequisite task is created. +3. Additional failures attach to existing blocker instead of creating duplicates. + +### D. Invalid state transition +1. Execution task tries to mutate an already-finalized resource. +2. No same-path retry occurs. +3. Task routes to refresh/replan or closes as superseded. + +### E. Verification failure +1. Mutation step appears to succeed but verification check is inconclusive. +2. Task does not spend standard retry budget. +3. Task escalates to review/verification work. + +### F. Dynamic budgets by task kind +1. `artifact_only` task with tool outage gets >0 retries. +2. `aggregate` task with dependency blocker gets 0 same-path retries. +3. `execution` task with auth failure gets <=1 retry before blocker promotion. + +### G. Repeated cost exhaustion +1. Same provider quota exhaustion hits multiple tasks in window. +2. A global throttle / provider-switch path is activated. +3. Future dispatch is deferred rather than burning leaf retries. + +--- + +## 10) Implementation order recommendation + +1. Add fingerprint schema + deterministic classifier in executor. +2. Persist metadata and outcome-log extensions. +3. Add blocker promotion / wake semantics. +4. Add per-task-kind retry policy table. +5. Add dashboard aggregation + regression tests. + +This order ensures the system can first *see* repeated failure identity, then *route* it, then *enforce* differentiated budgets. + +--- + +## Bottom line + +T1712 should change TaskCore from **"every failure increments one retry counter"** to **"each failure class carries a scoped fingerprint and a bounded next strategy."** + +That is the key behavior change needed to stop repeated sibling churn, promote blocker-removal work when appropriate, and make retry policy depend on both **what failed** and **what kind of task failed**. diff --git a/docs/task-cli-spec.md b/docs/task-cli-spec.md index 661d03a..af2dfb8 100644 --- a/docs/task-cli-spec.md +++ b/docs/task-cli-spec.md @@ -510,6 +510,105 @@ Abort a pending decomposition session without committing. task decompose cancel ``` +#### `task plan` / `task decompose plan` — one-shot plan-based decomposition + +Submit a complete decomposition in one command from a structured plan text. +Uses the same `POST /tasks/:id/decompose` endpoint as a regular commit, so +children appear in the dashboard immediately. + +`task plan` is the low-friction shortcut for agents; `task decompose plan` is +its explicit decomposition-scoped equivalent. + +``` +task plan "" +task plan --file +task plan --stdin + [--strategy sequential|parallel] (default: sequential) + +# Equivalent forms +task decompose plan --items "" +task decompose plan --file +task decompose plan --stdin + [--strategy sequential|parallel] (default: sequential) +``` + +**Plan format:** + +```markdown +# Optional Section Heading +- [ ] Checklist step title (cost: 10, assignee: coder) +- [x] Already-checked step (cost: 5) +1. Ordered list step +- Plain bullet step + Indented lines (2+ spaces) become the child description. + Multiple continuation lines are joined with newlines. +``` + +Supported item types: checklist (`- [ ]`), ordered (`1.`), plain bullet (`-`). + +Headings (`#`, `##`, etc.) set a context prefix applied to all following items +until the next heading: `"Section: Step title"`. + +**Trailing metadata** (optional, at end of item line): + +``` +(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true) +``` + +Supported keys: `cost`, `assignee`, `reviewer`, `skip-analysis`. + +**Plan editing / propagation semantics (v1):** + +- The imported list order becomes the child task order. With the default + `sequential` strategy, that order also drives which child is ready first. +- Checkbox state in the source markdown is **not** treated as task completion. + Checked and unchecked items both import as child tasks; completion is tracked + by taskcore child state after materialization. +- Before materialization, edit the markdown and re-run `task plan`. +- After materialization, the child tasks become the durable source of truth + shown in taskcore/dashboard. If the remaining plan needs to change, create a + new decomposition version for the remaining work rather than mutating + completed child history in place. + +**Cost allocation:** + +- Items with explicit `cost` keep that value. +- Items without `cost` share the remaining budget evenly (integer-cent + arithmetic — no float drift). +- Errors clearly if explicit costs exceed the parent's remaining budget, or + if there is no budget left for uncosted items. + +**Example:** + +```markdown +# Infrastructure +- [ ] Provision database (cost: 15, assignee: infra) +- [ ] Configure networking (cost: 10, assignee: infra) + +# Application +- [ ] Implement API endpoints (assignee: coder, reviewer: lead) + Build REST endpoints for user CRUD operations. +- [ ] Write integration tests +``` + +``` +task plan --file plan.md --strategy sequential +``` + +**Output:** + +``` +--- Plan materialized for T42 --- + +Created 4 child tasks: + T101: Infrastructure: Provision database 15.00 + T102: Infrastructure: Configure networking 10.00 + T103: Application: Implement API endpoints 12.50 + T104: Application: Write integration tests 12.50 + +Parent task is now a coordinator. Children will execute the plan. +``` + ### 3.6 Review Workflow (guided, multi-step) When a task is in the `review` phase and claimed by a reviewer. diff --git a/middle/prompt.ts b/middle/prompt.ts index 99790ab..32e0c95 100644 --- a/middle/prompt.ts +++ b/middle/prompt.ts @@ -412,7 +412,8 @@ function appendDecompositionInstructions(sections: string[], task: Task, config: sections.push(""); sections.push("## How to Submit Your Decomposition"); sections.push(""); - sections.push("Use the incremental decompose CLI — it guides you step by step:"); + sections.push("Preferred when you already have a checklist: use the one-shot plan flow (`task plan --file plan.md` or `task decompose plan --stdin`)."); + sections.push("If you need to build children incrementally, use the step-by-step decompose flow below:"); sections.push(""); sections.push("```bash"); sections.push(`# Step 1: Start a decomposition session`);