diff --git a/.task b/.task
index 4f33323..d3a31c8 100644
--- a/.task
+++ b/.task
@@ -1,10 +1,10 @@
 {
-  "taskId": "1282",
+  "taskId": "2123",
   "phase": "execution",
-  "fenceToken": 3,
-  "sessionId": "c893aa20-a7b7-4112-9cc3-68c12a747bf1",
-  "journalPath": "/tmp/taskcore-worktrees/journal-T1282/tasks/T1282/",
-  "codeWorktree": "/tmp/taskcore-worktrees/code-T1282",
-  "claimedAt": 1773098423616,
+  "fenceToken": 10,
+  "sessionId": "7be3c6a7-358e-45c7-b5af-6c1c5a51b82e",
+  "journalPath": "/tmp/taskcore-worktrees/journal-T2123/tasks/T2123/",
+  "codeWorktree": "/tmp/taskcore-worktrees/code-T2123",
+  "claimedAt": 1773404837466,
   "reviewNotes": []
 }
diff --git a/core/cli/plan-parse.ts b/core/cli/plan-parse.ts
new file mode 100644
index 0000000..1da2167
--- /dev/null
+++ b/core/cli/plan-parse.ts
@@ -0,0 +1,258 @@
+/**
+ * plan-parse.ts — parse markdown-ish plans into child task specs and allocate costs.
+ *
+ * Supports:
+ *   - Checklist items:    `- [ ] title` or `- [x] title`
+ *   - Ordered items:      `1. title`
+ *   - Bullet items:       `- title`
+ *   - Headings:           `# Section` — prefix subsequent items' titles with "Section: "
+ *   - Continuation lines: indented (2+ spaces) lines after an item enrich its description
+ *   - Trailing meta:      `(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true)`
+ */
+
+export interface ParsedItem {
+  title: string;
+  description: string;
+  cost: number | undefined;
+  assignee: string | undefined;
+  reviewer: string | undefined;
+  skipAnalysis: boolean;
+}
+
+export interface AllocatedItem {
+  title: string;
+  description: string;
+  cost: number;
+  assignee: string | undefined;
+  reviewer: string | undefined;
+  skipAnalysis: boolean;
+}
+
+interface ItemMeta {
+  cost?: number;
+  assignee?: string;
+  reviewer?: string;
+  skipAnalysis?: boolean;
+}
+
+function extractMeta(raw: string): { baseTitle: string; meta: ItemMeta } {
+  // Match trailing parenthetical: `title (key: val, key: val)`
+  const parenMatch = raw.match(/^(.*?)\s*\(([^)]+)\)\s*$/);
+  if (!parenMatch) return { baseTitle: raw.trim(), meta: {} };
+
+  const baseTitle = parenMatch[1]!.trim();
+  const metaStr = parenMatch[2]!;
+  const meta: ItemMeta = {};
+
+  for (const pair of metaStr.split(",")) {
+    const colonIdx = pair.indexOf(":");
+    if (colonIdx < 0) continue;
+    const key = pair.slice(0, colonIdx).trim().toLowerCase();
+    const value = pair.slice(colonIdx + 1).trim();
+
+    switch (key) {
+      case "cost": {
+        const n = Number(value);
+        if (Number.isFinite(n) && n > 0) meta.cost = n;
+        break;
+      }
+      case "assignee":
+        if (value) meta.assignee = value;
+        break;
+      case "reviewer":
+        if (value) meta.reviewer = value;
+        break;
+      case "skip-analysis":
+        meta.skipAnalysis = value === "true" || value === "1" || value === "yes";
+        break;
+    }
+  }
+  return { baseTitle, meta };
+}
+
+interface PendingItem {
+  title: string;
+  rawTitle: string; // item text without heading prefix, used as default description
+  cost: number | undefined;
+  assignee: string | undefined;
+  reviewer: string | undefined;
+  skipAnalysis: boolean;
+}
+
+/**
+ * Parse a markdown-ish plan text into ParsedItem[].
+ *
+ * Item markers:
+ *   - `- [ ] title` / `- [x] title`  — checklist
+ *   - `1. title`                      — ordered list
+ *   - `- title`                       — plain bullet
+ *
+ * Headings (`# …`) set a context prefix applied to all following items until
+ * the next heading.
+ *
+ * Indented lines (2+ spaces) following an item are appended to its description.
+ *
+ * A trailing parenthetical `(key: value, …)` on any item line supplies metadata.
+ */
+export function parsePlan(text: string): ParsedItem[] {
+  const lines = text.split("\n");
+  const items: ParsedItem[] = [];
+  let headingContext = "";
+  let pending: PendingItem | null = null;
+  const descLines: string[] = [];
+
+  function flush(): void {
+    if (!pending) return;
+    const description = descLines.length > 0 ? descLines.join("\n") : pending.rawTitle;
+    items.push({
+      title: pending.title,
+      description,
+      cost: pending.cost,
+      assignee: pending.assignee,
+      reviewer: pending.reviewer,
+      skipAnalysis: pending.skipAnalysis,
+    });
+    pending = null;
+    descLines.length = 0;
+  }
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+
+    // Blank lines — skip (don't flush; a blank between item and continuation is fine)
+    if (trimmed === "") continue;
+
+    // Heading: reset context prefix
+    const headingMatch = trimmed.match(/^#{1,6}\s+(.+)$/);
+    if (headingMatch) {
+      flush();
+      headingContext = headingMatch[1]!.trim();
+      continue;
+    }
+
+    // Checklist: `- [ ] title`  or  `- [x] title`  (any char inside brackets)
+    const checklistMatch = trimmed.match(/^-\s+\[[^\]]*\]\s+(.+)$/);
+    // Ordered: `1. title`  (must not have already matched checklist)
+    const orderedMatch = !checklistMatch ? trimmed.match(/^\d+\.\s+(.+)$/) : null;
+    // Bullet: `- title`  (only when neither of the above matched)
+    const bulletMatch = !checklistMatch && !orderedMatch ? trimmed.match(/^-\s+(.+)$/) : null;
+
+    const itemText = checklistMatch?.[1] ?? orderedMatch?.[1] ?? bulletMatch?.[1];
+
+    if (itemText !== undefined) {
+      flush();
+      const { baseTitle, meta } = extractMeta(itemText);
+      const prefix = headingContext ? `${headingContext}: ` : "";
+      pending = {
+        title: prefix + baseTitle,
+        rawTitle: baseTitle,
+        cost: meta.cost,
+        assignee: meta.assignee,
+        reviewer: meta.reviewer,
+        skipAnalysis: meta.skipAnalysis ?? false,
+      };
+      continue;
+    }
+
+    // Continuation: indented line (2+ spaces or a tab) while we have a pending item
+    if (pending && /^[ \t]{2,}/.test(line)) {
+      descLines.push(trimmed);
+      continue;
+    }
+
+    // Unrecognised line — flush pending (don't silently swallow item boundaries)
+    flush();
+  }
+
+  flush();
+  return items;
+}
+
+/**
+ * Allocate costs to items. Items with an explicit `cost` keep it; items without
+ * share the remaining budget evenly. Uses integer arithmetic (cents) to avoid
+ * float drift.
+ *
+ * Returns an error string on failure, or the allocated items on success.
+ */
+export function allocateCosts(
+  items: ParsedItem[],
+  budgetRemainingDollars: number,
+): { ok: true; items: AllocatedItem[] } | { ok: false; error: string } {
+  if (items.length === 0) {
+    return { ok: false, error: "plan contains no items" };
+  }
+
+  // Work in integer cents to avoid float drift
+  const budgetCents = Math.round(budgetRemainingDollars * 100);
+  if (budgetCents <= 0) {
+    return {
+      ok: false,
+      error: `no positive budget remaining (${budgetRemainingDollars.toFixed(2)})`,
+    };
+  }
+
+  let specifiedCents = 0;
+  let unspecifiedCount = 0;
+  for (const item of items) {
+    if (item.cost !== undefined) {
+      specifiedCents += Math.round(item.cost * 100);
+    } else {
+      unspecifiedCount++;
+    }
+  }
+
+  if (specifiedCents > budgetCents) {
+    return {
+      ok: false,
+      error:
+        `explicit costs (${(specifiedCents / 100).toFixed(2)}) exceed remaining budget` +
+        ` (${budgetRemainingDollars.toFixed(2)})`,
+    };
+  }
+
+  const remainingCents = budgetCents - specifiedCents;
+  let perItemCents = 0;
+  let extraCents = 0;
+
+  if (unspecifiedCount > 0) {
+    if (remainingCents <= 0) {
+      return {
+        ok: false,
+        error: `no budget remains for ${unspecifiedCount} item(s) without explicit cost`,
+      };
+    }
+    perItemCents = Math.floor(remainingCents / unspecifiedCount);
+    extraCents = remainingCents % unspecifiedCount;
+    if (perItemCents === 0) {
+      return {
+        ok: false,
+        error:
+          `remaining budget (${(remainingCents / 100).toFixed(2)}) too small to distribute` +
+          ` across ${unspecifiedCount} uncosted item(s)`,
+      };
+    }
+  }
+
+  // Distribute extra cents to the first items (deterministic, no float drift)
+  let extraGiven = 0;
+  const allocated: AllocatedItem[] = items.map((item) => {
+    let costCents: number;
+    if (item.cost !== undefined) {
+      costCents = Math.round(item.cost * 100);
+    } else {
+      costCents = perItemCents + (extraGiven < extraCents ? 1 : 0);
+      if (extraGiven < extraCents) extraGiven++;
+    }
+    return {
+      title: item.title,
+      description: item.description,
+      cost: costCents / 100,
+      assignee: item.assignee,
+      reviewer: item.reviewer,
+      skipAnalysis: item.skipAnalysis,
+    };
+  });
+
+  return { ok: true, items: allocated };
+}
diff --git a/core/cli/task.ts b/core/cli/task.ts
index 19192a2..9d71699 100644
--- a/core/cli/task.ts
+++ b/core/cli/task.ts
@@ -11,6 +11,7 @@ import {
   filterClaimableTasks,
   includeReviewQueueTask,
 } from "./claimability.js";
+import { allocateCosts, parsePlan } from "./plan-parse.js";
 
 // Handle EPIPE errors gracefully (e.g., when piping to head)
 process.stdout.on("error", (err: NodeJS.ErrnoException) => {
@@ -535,7 +536,9 @@ function phaseGuidance(phase: string, condition: string): string[] {
     case "decomposition.active":
       return [
         "You're decomposing this task. Next steps:",
-        "  task decompose start      # begin decomposition",
+        "  task plan --file plan.md  # materialize a whole checklist in one shot",
+        "  task decompose plan --stdin  # same flow, reading markdown from stdin",
+        "  task decompose start      # fallback: build children incrementally",
         "  task decompose add <...>  # add a child task",
         "  task decompose commit     # finalize children",
         "  task decompose cancel     # cancel decomposition",
@@ -768,7 +771,8 @@ function printHelp(): void {
     "  task update <message>",
     "  task analyze",
     "  task decide <execute|decompose>",
-    "  task decompose <start|add|commit|cancel> ...",
+    "  task plan <plan-text> | --file <path> | --stdin  # plan-based decomposition",
+    "  task decompose <start|add|commit|cancel|plan> ...",
     "  task review <read|note|approve|reject|request-changes> ...",
     "  task journal <read|write|write-file> ...",
     "  task worktree",
@@ -1520,10 +1524,12 @@ async function cmdClaim(argv: string[], jsonMode: boolean): Promise<void> {
     process.stdout.write("  task review reject --force             — reject permanently (prefer request-changes)\n");
     process.stdout.write("  task review request-changes \"notes\"   — request changes\n");
   } else if (phase === "decomposition") {
-    process.stdout.write("  task decompose start      — begin decomposition\n");
-    process.stdout.write("  task decompose add \"...\"  — add a child task\n");
-    process.stdout.write("  task decompose commit     — finalize children\n");
-    process.stdout.write("  task decompose cancel     — cancel decomposition\n");
+    process.stdout.write("  task plan --file plan.md   — commit a whole checklist in one shot\n");
+    process.stdout.write("  task decompose plan --stdin — same flow, reading markdown from stdin\n");
+    process.stdout.write("  task decompose start       — fallback: begin incremental decomposition\n");
+    process.stdout.write("  task decompose add \"...\"   — add a child task\n");
+    process.stdout.write("  task decompose commit      — finalize children\n");
+    process.stdout.write("  task decompose cancel      — cancel decomposition\n");
   } else {
     process.stdout.write("  task submit \"what you did\"   — when done\n");
     process.stdout.write("  task complete \"evidence\"     — done, no review needed\n");
@@ -2005,9 +2011,221 @@ async function cmdDecompose(argv: string[], jsonMode: boolean): Promise<void> {
       return;
     }
 
+    case "plan": {
+      const { flags } = parseFlags(args);
+
+      // Read plan text from --items, --file, or --stdin
+      let planText: string;
+      const itemsFlag = getFlagString(flags, "items");
+      const fileFlag = getFlagString(flags, "file");
+      const useStdin = getFlagBool(flags, "stdin");
+
+      if (itemsFlag !== undefined) {
+        planText = itemsFlag;
+      } else if (fileFlag !== undefined) {
+        if (!fs.existsSync(fileFlag)) {
+          throw new CliError(`File not found: ${fileFlag}`, 1);
+        }
+        planText = fs.readFileSync(fileFlag, "utf-8");
+      } else if (useStdin) {
+        planText = fs.readFileSync(0, "utf-8");
+      } else {
+        throw new CliError(
+          "Usage: task decompose plan --items <text> | --file <path> | --stdin\n" +
+            "       [--strategy sequential|parallel]",
+          1,
+        );
+      }
+
+      const strategyRaw = getFlagString(flags, "strategy") ?? "sequential";
+      if (strategyRaw !== "sequential" && strategyRaw !== "parallel") {
+        throw new CliError("--strategy must be 'sequential' or 'parallel'", 1);
+      }
+      const strategy = strategyRaw as "sequential" | "parallel";
+
+      // Parse the plan into items
+      const parsedItems = parsePlan(planText);
+      if (parsedItems.length === 0) {
+        throw new CliError(
+          "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.",
+          1,
+        );
+      }
+
+      // Fetch parent task to determine remaining budget
+      const task = await getTask(taskId);
+      const costObj = asRecord(task["cost"]);
+      const budgetRemaining = costObj
+        ? (asNumber(costObj["allocated"]) ?? 0)
+          - (asNumber(costObj["consumed"]) ?? 0)
+          - (asNumber(costObj["childAllocated"]) ?? 0)
+          + (asNumber(costObj["childRecovered"]) ?? 0)
+        : 0;
+
+      // Allocate costs (explicit + auto-distribution)
+      const allocation = allocateCosts(parsedItems, budgetRemaining);
+      if (!allocation.ok) {
+        throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1);
+      }
+
+      // Build children array for the one-shot endpoint
+      const children = allocation.items.map((item) => {
+        const child: Record<string, unknown> = {
+          title: item.title,
+          description: item.description,
+          costAllocation: item.cost,
+        };
+        if (item.assignee) child["assignee"] = item.assignee;
+        if (item.reviewer) child["reviewer"] = item.reviewer;
+        if (item.skipAnalysis) child["skipAnalysis"] = true;
+        return child;
+      });
+
+      const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, {
+        children,
+        coordinationMode: strategy,
+      });
+
+      if (jsonMode) {
+        process.stdout.write(JSON.stringify(response, null, 2) + "\n");
+        return;
+      }
+
+      process.stdout.write("--- Decomposition committed ---\n\n");
+      const responseChildren = asArray<unknown>(response["children"])
+        .map((c) => asRecord(c))
+        .filter((c): c is Record<string, unknown> => c !== null);
+      process.stdout.write(`Created ${responseChildren.length} children:\n`);
+      for (const child of responseChildren) {
+        process.stdout.write(
+          `  T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")}  ${formatMoney(child["costAllocation"])}\n`,
+        );
+      }
+
+      const planAgentId = process.env["TASKCORE_AGENT_ID"];
+      if (planAgentId) clearActiveTask(planAgentId);
+      return;
+    }
+
     default:
-      throw new CliError("Usage: task decompose <start|add|checkpoint|commit|cancel> ...", 1);
+      throw new CliError("Usage: task decompose <start|add|checkpoint|commit|cancel|plan> ...", 1);
+  }
+}
+
+/**
+ * `task plan` — top-level shortcut for plan-based decomposition.
+ *
+ * Usage:
+ *   task plan "- [ ] step one\n- [ ] step two"
+ *   task plan --file plan.md
+ *   task plan --stdin
+ *   task plan "- step one" --strategy parallel
+ *
+ * Equivalent to `task decompose plan` but more natural for agents.
+ * Accepts plan text as a positional arg (most common), --file, or --stdin.
+ */
+async function cmdPlan(argv: string[], jsonMode: boolean): Promise<void> {
+  requireAgentId();
+  const taskId = currentTaskId();
+  const { positionals, flags } = parseFlags(argv);
+
+  // Read plan text from positional arg, --file, or --stdin
+  let planText: string;
+  const fileFlag = getFlagString(flags, "file");
+  const useStdin = getFlagBool(flags, "stdin");
+
+  if (positionals.length > 0) {
+    planText = positionals.join(" ");
+  } else if (fileFlag !== undefined) {
+    if (!fs.existsSync(fileFlag)) {
+      throw new CliError(`File not found: ${fileFlag}`, 1);
+    }
+    planText = fs.readFileSync(fileFlag, "utf-8");
+  } else if (useStdin) {
+    planText = fs.readFileSync(0, "utf-8");
+  } else {
+    throw new CliError(
+      "Usage: task plan <plan-text> | --file <path> | --stdin\n" +
+        "       [--strategy sequential|parallel]\n" +
+        "\n" +
+        "Plan format:\n" +
+        "  - [ ] Step title (cost: 10, assignee: coder)\n" +
+        "  1. Ordered step\n" +
+        "  - Bullet step\n" +
+        "  # Section heading\n" +
+        "  Indented lines extend item descriptions.",
+      1,
+    );
+  }
+
+  const strategyRaw = getFlagString(flags, "strategy") ?? "sequential";
+  if (strategyRaw !== "sequential" && strategyRaw !== "parallel") {
+    throw new CliError("--strategy must be 'sequential' or 'parallel'", 1);
+  }
+  const strategy = strategyRaw as "sequential" | "parallel";
+
+  // Parse the plan
+  const parsedItems = parsePlan(planText);
+  if (parsedItems.length === 0) {
+    throw new CliError(
+      "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.",
+      1,
+    );
   }
+
+  // Fetch task to determine remaining budget
+  const task = await getTask(taskId);
+  const costObj = asRecord(task["cost"]);
+  const budgetRemaining = costObj
+    ? (asNumber(costObj["allocated"]) ?? 0)
+      - (asNumber(costObj["consumed"]) ?? 0)
+      - (asNumber(costObj["childAllocated"]) ?? 0)
+      + (asNumber(costObj["childRecovered"]) ?? 0)
+    : 0;
+
+  // Allocate costs
+  const allocation = allocateCosts(parsedItems, budgetRemaining);
+  if (!allocation.ok) {
+    throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1);
+  }
+
+  // Build children array
+  const children = allocation.items.map((item) => {
+    const child: Record<string, unknown> = {
+      title: item.title,
+      description: item.description,
+      costAllocation: item.cost,
+    };
+    if (item.assignee) child["assignee"] = item.assignee;
+    if (item.reviewer) child["reviewer"] = item.reviewer;
+    if (item.skipAnalysis) child["skipAnalysis"] = true;
+    return child;
+  });
+
+  const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, {
+    children,
+    coordinationMode: strategy,
+  });
+
+  if (jsonMode) {
+    process.stdout.write(JSON.stringify(response, null, 2) + "\n");
+    return;
+  }
+
+  process.stdout.write(`--- Plan materialized for T${taskId} ---\n\n`);
+  const responseChildren = asArray<unknown>(response["children"])
+    .map((c) => asRecord(c))
+    .filter((c): c is Record<string, unknown> => c !== null);
+  process.stdout.write(`Created ${responseChildren.length} child tasks:\n`);
+  for (const child of responseChildren) {
+    process.stdout.write(
+      `  T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")}  ${formatMoney(child["costAllocation"])}\n`,
+    );
+  }
+  process.stdout.write("\nParent task is now a coordinator. Children will execute the plan.\n");
+
+  const planAgentId = process.env["TASKCORE_AGENT_ID"];
+  if (planAgentId) clearActiveTask(planAgentId);
 }
 
 function ensureContextWithTaskId(taskId: string): TaskContext {
@@ -2606,6 +2824,27 @@ const subcommandHelp: Record<string, string> = {
     "  decompose   Task will be split into subtasks",
   ].join("\n"),
 
+  plan: [
+    "task plan <plan-text> | --file <path> | --stdin",
+    "",
+    "One-shot plan-based decomposition. Parses a markdown plan into",
+    "child tasks and materializes them under the current task.",
+    "Equivalent to `task decompose plan` but accepts plan as a positional arg.",
+    "",
+    "Options:",
+    "  --file <path>                       Read plan from file",
+    "  --stdin                             Read plan from stdin",
+    "  --strategy sequential|parallel      Coordination mode (default: sequential)",
+    "",
+    "Plan format:",
+    "  - [ ] Step title (cost: 10, assignee: coder)",
+    "  1. Ordered step",
+    "  - Bullet step",
+    "  # Section heading   — prefixes following items with 'Section: '",
+    "  Indented lines (2+ spaces) extend the item description.",
+    "  Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis",
+  ].join("\n"),
+
   decompose: [
     "task decompose <subcommand> ...",
     "",
@@ -2620,6 +2859,19 @@ const subcommandHelp: Record<string, string> = {
     "    --skip-analysis                     Skip analysis phase for child",
     "  commit <strategy>                     Commit the decomposition",
     "  cancel                                Cancel pending decomposition",
+    "  plan                                  One-shot plan-based decomposition",
+    "    --items <markdown>                  Inline plan text",
+    "    --file <path>                       Read plan from file",
+    "    --stdin                             Read plan from stdin",
+    "    --strategy sequential|parallel      Coordination mode (default: sequential)",
+    "",
+    "Plan format (for 'plan' subcommand):",
+    "  - [ ] Step title (cost: 10, assignee: coder)",
+    "  1. Ordered step",
+    "  - Bullet step",
+    "  # Section heading   — prefixes following items with 'Section: '",
+    "  Indented lines (2+ spaces) extend the item description.",
+    "  Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis",
   ].join("\n"),
 
   review: [
@@ -2799,6 +3051,9 @@ async function run(argv: string[]): Promise<void> {
     case "decide":
       await cmdDecide(rest, jsonMode);
       return;
+    case "plan":
+      await cmdPlan(rest, jsonMode);
+      return;
     case "decompose":
       await cmdDecompose(rest, jsonMode);
       return;
diff --git a/core/clock.ts b/core/clock.ts
index f77ff86..4809ff5 100644
--- a/core/clock.ts
+++ b/core/clock.ts
@@ -8,6 +8,7 @@ import {
   type Event,
   type LeaseExpired,
   type PhaseTransition,
+  type RetryScheduled,
   type SystemState,
   type Task,
   type TaskExhausted,
@@ -18,6 +19,14 @@ function backoffDue(task: Task, now: number): boolean {
   return task.condition === "retryWait" && task.retryAfter !== null && task.retryAfter <= now;
 }
 
+function hasActiveAgent(task: Task): boolean {
+  return typeof task.leasedTo === "string" && task.leasedTo.trim().length > 0;
+}
+
+function malformedActiveLease(task: Task): boolean {
+  return task.condition === "active" && (!hasActiveAgent(task) || task.leaseExpiresAt === null);
+}
+
 function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: boolean; anyDone: boolean } {
   if (task.children.length === 0) {
     return { allTerminal: false, anyDone: false };
@@ -39,6 +48,7 @@ function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: b
 
 export class CoreClock {
   private readonly sourceId: string;
+  private static readonly LEAKED_LEASE_BACKOFF_MS = 1_000;
 
   public constructor(sourceId = "core-clock") {
     this.sourceId = sourceId;
@@ -103,11 +113,22 @@ export class CoreClock {
         }
       }
 
-      if (
-        task.condition === "active" &&
-        task.leaseExpiresAt !== null &&
-        task.leaseExpiresAt <= now
-      ) {
+      if (malformedActiveLease(task)) {
+        const retryScheduled: RetryScheduled = {
+          type: "RetryScheduled",
+          taskId: task.id,
+          ts: now,
+          fenceToken: task.currentFenceToken,
+          reason: "orphaned_on_restart",
+          retryAfter: now + CoreClock.LEAKED_LEASE_BACKOFF_MS,
+          phase: task.phase,
+          attemptNumber: Math.max(1, task.attempts[task.phase].used),
+        };
+        due.push(retryScheduled);
+        continue;
+      }
+
+      if (task.condition === "active" && task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) {
         const leaseExpired: LeaseExpired = {
           type: "LeaseExpired",
           taskId: task.id,
diff --git a/core/test/cli-plan.test.ts b/core/test/cli-plan.test.ts
new file mode 100644
index 0000000..4d50269
--- /dev/null
+++ b/core/test/cli-plan.test.ts
@@ -0,0 +1,221 @@
+import * as http from "node:http";
+import { spawnSync } from "node:child_process";
+import { beforeEach, afterEach, test } from "node:test";
+import * as assert from "node:assert/strict";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { OrchestrationCore } from "../index.js";
+import { createHttpServer } from "../../middle/http.js";
+import { loadConfig, type Config } from "../../middle/config.js";
+import { initJournalRepo } from "../../middle/journal.js";
+
+let server: http.Server;
+let core: OrchestrationCore;
+let tmpDir: string;
+let port: number;
+let config: Config;
+
+const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
+
+function request(
+  method: string,
+  urlPath: string,
+  body?: unknown,
+): Promise<{ status: number; body: unknown }> {
+  return new Promise((resolve, reject) => {
+    const data = body ? JSON.stringify(body) : undefined;
+    const req = http.request(
+      {
+        hostname: "127.0.0.1",
+        port,
+        path: urlPath,
+        method,
+        headers: data
+          ? {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          }
+          : {},
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (chunk: Buffer) => chunks.push(chunk));
+        res.on("end", () => {
+          const raw = Buffer.concat(chunks).toString("utf-8");
+          try {
+            resolve({ status: res.statusCode ?? 500, body: JSON.parse(raw) });
+          } catch {
+            resolve({ status: res.statusCode ?? 500, body: raw });
+          }
+        });
+      },
+    );
+    req.on("error", reject);
+    if (data) req.write(data);
+    req.end();
+  });
+}
+
+async function setup(): Promise<void> {
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-cli-plan-"));
+  const dbPath = path.join(tmpDir, "test.db");
+  port = 19800 + Math.floor(Math.random() * 1000);
+  const journalRepoPath = path.join(tmpDir, "journal");
+  const worktreeBaseDir = path.join(tmpDir, "worktrees");
+  const workspaceDir = path.join(tmpDir, "workspace");
+  const agentRegistry = path.join(tmpDir, "registry.json");
+
+  fs.mkdirSync(workspaceDir, { recursive: true });
+  fs.mkdirSync(path.join(workspaceDir, "data"), { recursive: true });
+  fs.writeFileSync(agentRegistry, JSON.stringify({
+    agents: [
+      { id: "coder", assignable: true, reviewer: true, consulted: true },
+      { id: "overseer", assignable: true, reviewer: true, consulted: true },
+    ],
+  }, null, 2));
+  initJournalRepo(journalRepoPath);
+
+  core = new OrchestrationCore({
+    dbPath,
+    invariantChecks: true,
+    snapshotEvery: 50,
+  });
+
+  config = {
+    ...loadConfig(),
+    port,
+    dbPath,
+    agentRegistry,
+    workspaceDir,
+    journalRepoPath,
+    worktreeBaseDir,
+    runtimeFile: "",
+  };
+
+  server = createHttpServer(core, config);
+  await new Promise<void>((resolve) => {
+    server.listen(port, "127.0.0.1", resolve);
+  });
+}
+
+async function teardown(): Promise<void> {
+  await new Promise<void>((resolve) => server.close(() => resolve()));
+  core.close();
+  fs.rmSync(tmpDir, { recursive: true, force: true });
+}
+
+beforeEach(setup);
+afterEach(teardown);
+
+test("task plan materializes a markdown checklist into child tasks end-to-end", async () => {
+  const createRes = await request("POST", "/tasks", {
+    title: "Plan import parent",
+    description: "Verify task plan end-to-end",
+    assignee: "coder",
+    costBudget: 25,
+  });
+  assert.equal(createRes.status, 201);
+
+  const claimRes = await request("POST", "/tasks/1/claim", {
+    agentId: "coder",
+    source: "test",
+  });
+  assert.equal(claimRes.status, 200);
+
+  const planPath = path.join(tmpDir, "plan.md");
+  fs.writeFileSync(planPath, [
+    "# Planning",
+    "- [ ] Parse markdown checklist (cost: 5, assignee: coder)",
+    "  Capture headings and metadata.",
+    "- [ ] Materialize children (reviewer: overseer)",
+    "  Verify the parent/child shape in taskcore/dashboard.",
+  ].join("\n"));
+
+  const cli = spawnSync(
+    process.execPath,
+    ["--import", "tsx", "core/cli/task.ts", "plan", "--file", planPath],
+    {
+      cwd: repoRoot,
+      env: {
+        ...process.env,
+        ORCHESTRATOR_PORT: String(port),
+        TASKCORE_AGENT_ID: "coder",
+        TASK_ID: "1",
+      },
+      encoding: "utf-8",
+    },
+  );
+
+  assert.equal(cli.status, 0, `stdout:\n${cli.stdout}\n\nstderr:\n${cli.stderr}`);
+  assert.match(cli.stdout, /Plan materialized for T1/);
+  assert.match(cli.stdout, /Created 2 child tasks/);
+
+  const parentRes = await request("GET", "/tasks/1");
+  assert.equal(parentRes.status, 200);
+  const parent = (parentRes.body as {
+    task: {
+      phase: string;
+      condition: string;
+      children: string[];
+      coordination?: { mode?: string };
+    };
+  }).task;
+
+  assert.equal(parent.phase, "analysis");
+  assert.equal(parent.condition, "waiting");
+  assert.deepEqual(parent.children, ["2", "3"]);
+  assert.equal(parent.coordination?.mode, "sequential_children");
+
+  const childrenRes = await request("GET", "/tasks?parentId=1&full=true");
+  assert.equal(childrenRes.status, 200);
+  const children = (childrenRes.body as {
+    tasks: Array<{
+      id: string;
+      title: string;
+      description: string;
+      phase: string;
+      condition: string;
+      metadata: { assignee?: string; reviewer?: string };
+      cost: { allocated: number };
+    }>;
+  }).tasks;
+
+  assert.equal(children.length, 2);
+  assert.deepEqual(
+    children.map((child) => ({
+      id: child.id,
+      title: child.title,
+      description: child.description,
+      phase: child.phase,
+      condition: child.condition,
+      assignee: child.metadata.assignee,
+      reviewer: child.metadata.reviewer,
+      allocated: child.cost.allocated,
+    })),
+    [
+      {
+        id: "2",
+        title: "Planning: Parse markdown checklist",
+        description: "Capture headings and metadata.",
+        phase: "analysis",
+        condition: "ready",
+        assignee: "coder",
+        reviewer: undefined,
+        allocated: 5,
+      },
+      {
+        id: "3",
+        title: "Planning: Materialize children",
+        description: "Verify the parent/child shape in taskcore/dashboard.",
+        phase: "analysis",
+        condition: "waiting",
+        assignee: undefined,
+        reviewer: "overseer",
+        allocated: 20,
+      },
+    ],
+  );
+});
diff --git a/core/test/plan-parse.test.ts b/core/test/plan-parse.test.ts
new file mode 100644
index 0000000..e1e4543
--- /dev/null
+++ b/core/test/plan-parse.test.ts
@@ -0,0 +1,269 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { allocateCosts, parsePlan } from "../cli/plan-parse.js";
+
+// ---------------------------------------------------------------------------
+// Parser tests
+// ---------------------------------------------------------------------------
+
+test("parsePlan: checklist items", () => {
+  const items = parsePlan("- [ ] First step\n- [ ] Second step");
+  assert.equal(items.length, 2);
+  assert.equal(items[0]!.title, "First step");
+  assert.equal(items[1]!.title, "Second step");
+});
+
+test("parsePlan: checked and unchecked checklist both parse", () => {
+  const items = parsePlan("- [ ] Pending\n- [x] Done\n- [X] Also done");
+  assert.equal(items.length, 3);
+  assert.equal(items[0]!.title, "Pending");
+  assert.equal(items[1]!.title, "Done");
+  assert.equal(items[2]!.title, "Also done");
+});
+
+test("parsePlan: ordered list items", () => {
+  const items = parsePlan("1. Alpha\n2. Beta\n3. Gamma");
+  assert.equal(items.length, 3);
+  assert.equal(items[0]!.title, "Alpha");
+  assert.equal(items[1]!.title, "Beta");
+  assert.equal(items[2]!.title, "Gamma");
+});
+
+test("parsePlan: plain bullet items", () => {
+  const items = parsePlan("- Do thing A\n- Do thing B");
+  assert.equal(items.length, 2);
+  assert.equal(items[0]!.title, "Do thing A");
+  assert.equal(items[1]!.title, "Do thing B");
+});
+
+test("parsePlan: headings prefix subsequent items", () => {
+  const items = parsePlan("# Setup\n- Install deps\n# Cleanup\n- Remove temp files");
+  assert.equal(items.length, 2);
+  assert.equal(items[0]!.title, "Setup: Install deps");
+  assert.equal(items[1]!.title, "Cleanup: Remove temp files");
+});
+
+test("parsePlan: heading context resets on new heading", () => {
+  const items = parsePlan("# Phase 1\n- Item A\n# Phase 2\n- Item B");
+  assert.equal(items[0]!.title, "Phase 1: Item A");
+  assert.equal(items[1]!.title, "Phase 2: Item B");
+});
+
+test("parsePlan: no heading means no prefix", () => {
+  const items = parsePlan("- Plain item");
+  assert.equal(items[0]!.title, "Plain item");
+});
+
+test("parsePlan: indented continuation lines become description", () => {
+  const items = parsePlan(
+    "- Do the thing\n  With extra context here\n  And more details",
+  );
+  assert.equal(items.length, 1);
+  assert.equal(items[0]!.title, "Do the thing");
+  assert.equal(items[0]!.description, "With extra context here\nAnd more details");
+});
+
+test("parsePlan: description defaults to item text when no continuations", () => {
+  const items = parsePlan("- Simple item");
+  assert.equal(items[0]!.description, "Simple item");
+});
+
+test("parsePlan: blank lines between items are ignored", () => {
+  const items = parsePlan("- First\n\n- Second\n\n- Third");
+  assert.equal(items.length, 3);
+});
+
+test("parsePlan: unrecognised prose flushes the pending item", () => {
+  const items = parsePlan([
+    "- First step",
+    "This paragraph should not become a continuation line",
+    "- Second step",
+  ].join("\n"));
+
+  assert.equal(items.length, 2);
+  assert.equal(items[0]!.title, "First step");
+  assert.equal(items[0]!.description, "First step");
+  assert.equal(items[1]!.title, "Second step");
+});
+
+test("parsePlan: inline cost metadata", () => {
+  const items = parsePlan("- Do something (cost: 15)");
+  assert.equal(items[0]!.title, "Do something");
+  assert.equal(items[0]!.cost, 15);
+});
+
+test("parsePlan: inline assignee and reviewer", () => {
+  const items = parsePlan("- Do something (assignee: coder, reviewer: overseer)");
+  assert.equal(items[0]!.assignee, "coder");
+  assert.equal(items[0]!.reviewer, "overseer");
+});
+
+test("parsePlan: skip-analysis flag true", () => {
+  const items = parsePlan("- Trivial task (skip-analysis: true)");
+  assert.equal(items[0]!.skipAnalysis, true);
+});
+
+test("parsePlan: skip-analysis flag false", () => {
+  const items = parsePlan("- Normal task (skip-analysis: false)");
+  assert.equal(items[0]!.skipAnalysis, false);
+});
+
+test("parsePlan: combined metadata", () => {
+  const items = parsePlan(
+    "- Full item (cost: 20, assignee: coder, reviewer: lead, skip-analysis: true)",
+  );
+  assert.equal(items[0]!.title, "Full item");
+  assert.equal(items[0]!.cost, 20);
+  assert.equal(items[0]!.assignee, "coder");
+  assert.equal(items[0]!.reviewer, "lead");
+  assert.equal(items[0]!.skipAnalysis, true);
+});
+
+test("parsePlan: metadata stripped from title", () => {
+  const items = parsePlan("- Fix the bug (cost: 5)");
+  assert.equal(items[0]!.title, "Fix the bug");
+  assert.equal(items[0]!.cost, 5);
+});
+
+test("parsePlan: heading prefix not applied to description", () => {
+  const items = parsePlan("# Phase\n- Task title");
+  assert.equal(items[0]!.title, "Phase: Task title");
+  assert.equal(items[0]!.description, "Task title"); // description has no heading prefix
+});
+
+test("parsePlan: mixed formats in one plan", () => {
+  const plan = [
+    "# Bootstrap",
+    "- [ ] Install dependencies (cost: 5)",
+    "1. Configure environment (cost: 10, assignee: infra)",
+    "# Feature",
+    "- Build the thing",
+    "  Detailed instructions here",
+  ].join("\n");
+
+  const items = parsePlan(plan);
+  assert.equal(items.length, 3);
+  assert.equal(items[0]!.title, "Bootstrap: Install dependencies");
+  assert.equal(items[0]!.cost, 5);
+  assert.equal(items[1]!.title, "Bootstrap: Configure environment");
+  assert.equal(items[1]!.cost, 10);
+  assert.equal(items[1]!.assignee, "infra");
+  assert.equal(items[2]!.title, "Feature: Build the thing");
+  assert.equal(items[2]!.description, "Detailed instructions here");
+});
+
+// ---------------------------------------------------------------------------
+// Cost allocation tests
+// ---------------------------------------------------------------------------
+
+test("allocateCosts: errors on empty items", () => {
+  const result = allocateCosts([], 100);
+  assert.equal(result.ok, false);
+});
+
+test("allocateCosts: errors on zero budget", () => {
+  const items = parsePlan("- Step");
+  const result = allocateCosts(items, 0);
+  assert.equal(result.ok, false);
+  if (result.ok) return;
+  assert.match(result.error, /no positive budget/);
+});
+
+test("allocateCosts: errors on negative budget", () => {
+  const items = parsePlan("- Step");
+  const result = allocateCosts(items, -5);
+  assert.equal(result.ok, false);
+});
+
+test("allocateCosts: uses explicit costs as-is", () => {
+  const items = parsePlan("- Step A (cost: 10)\n- Step B (cost: 20)");
+  const result = allocateCosts(items, 100);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.cost, 10);
+  assert.equal(result.items[1]!.cost, 20);
+});
+
+test("allocateCosts: distributes remaining budget evenly", () => {
+  const items = parsePlan("- A\n- B\n- C");
+  const result = allocateCosts(items, 30);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.cost, 10);
+  assert.equal(result.items[1]!.cost, 10);
+  assert.equal(result.items[2]!.cost, 10);
+});
+
+test("allocateCosts: distributes remainder cents to first items", () => {
+  // $10.01 across 3 items → 3.34, 3.34, 3.33 (1 extra cent to first two)
+  const items = parsePlan("- A\n- B\n- C");
+  const result = allocateCosts(items, 10.01);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0);
+  assert.equal(totalCents, 1001);
+  assert.equal(result.items[0]!.cost, 3.34);
+  assert.equal(result.items[1]!.cost, 3.34);
+  assert.equal(result.items[2]!.cost, 3.33);
+});
+
+test("allocateCosts: mixes explicit and auto-distributed costs", () => {
+  const items = parsePlan("- A (cost: 10)\n- B\n- C");
+  const result = allocateCosts(items, 30);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.cost, 10); // explicit
+  assert.equal(result.items[1]!.cost, 10); // (30 - 10) / 2
+  assert.equal(result.items[2]!.cost, 10);
+});
+
+test("allocateCosts: total equals budget when all auto", () => {
+  const items = parsePlan("- A\n- B");
+  const result = allocateCosts(items, 50);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0);
+  assert.equal(totalCents, 5000);
+});
+
+test("allocateCosts: errors if explicit costs exceed budget", () => {
+  const items = parsePlan("- A (cost: 60)\n- B (cost: 50)");
+  const result = allocateCosts(items, 100);
+  assert.equal(result.ok, false);
+  if (result.ok) return;
+  assert.match(result.error, /exceed/);
+});
+
+test("allocateCosts: errors if no budget left for unspecified items", () => {
+  const items = parsePlan("- A (cost: 100)\n- B");
+  const result = allocateCosts(items, 100);
+  assert.equal(result.ok, false);
+  if (result.ok) return;
+  assert.match(result.error, /no budget/);
+});
+
+test("allocateCosts: preserves assignee and reviewer on items", () => {
+  const items = parsePlan("- Task (cost: 10, assignee: coder, reviewer: lead)");
+  const result = allocateCosts(items, 100);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.assignee, "coder");
+  assert.equal(result.items[0]!.reviewer, "lead");
+});
+
+test("allocateCosts: preserves skipAnalysis flag", () => {
+  const items = parsePlan("- Task (cost: 10, skip-analysis: true)");
+  const result = allocateCosts(items, 100);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.skipAnalysis, true);
+});
+
+test("allocateCosts: single item gets full budget when no cost specified", () => {
+  const items = parsePlan("- Single task");
+  const result = allocateCosts(items, 25);
+  assert.equal(result.ok, true);
+  if (!result.ok) return;
+  assert.equal(result.items[0]!.cost, 25);
+});
diff --git a/core/test/scenarios.test.ts b/core/test/scenarios.test.ts
index 1ac82a1..a585171 100644
--- a/core/test/scenarios.test.ts
+++ b/core/test/scenarios.test.ts
@@ -1405,6 +1405,49 @@ test("Scenario S: LeaseExpired auto-emission transitions leased task to retryWai
   assert.equal(state.tasks.T1800?.leasedTo, null);
 });
 
+test("Scenario S2: malformed active task with missing lease is retried by the clock", () => {
+  let state = createInitialState();
+  state = mustReduce(state, createTask("T1801", 1));
+  state = mustReduce(state, lease("T1801", 2, 1, "analysis", "analyst", "a-1801"));
+
+  state.tasks.T1801!.leaseExpiresAt = null;
+
+  const clock = new CoreClock();
+  const due = clock.collectDueEvents(state, 10_000);
+  const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1801");
+  assert.ok(retryScheduled);
+  if (!retryScheduled || retryScheduled.type !== "RetryScheduled") {
+    assert.fail("expected RetryScheduled");
+  }
+  assert.equal(retryScheduled.reason, "orphaned_on_restart");
+
+  state = mustReduce(state, retryScheduled);
+  assert.equal(state.tasks.T1801?.condition, "retryWait");
+  assert.equal(state.tasks.T1801?.leasedTo, null);
+  assert.equal(state.tasks.T1801?.leaseExpiresAt, null);
+});
+
+test("Scenario S3: malformed active task with empty agent is retried by the clock", () => {
+  let state = createInitialState();
+  state = mustReduce(state, createTask("T1802", 1));
+  state = mustReduce(state, lease("T1802", 2, 1, "analysis", "analyst", "a-1802"));
+
+  state.tasks.T1802!.leasedTo = "";
+
+  const clock = new CoreClock();
+  const due = clock.collectDueEvents(state, 10_000);
+  const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1802");
+  assert.ok(retryScheduled);
+  if (!retryScheduled || retryScheduled.type !== "RetryScheduled") {
+    assert.fail("expected RetryScheduled");
+  }
+  assert.equal(retryScheduled.reason, "orphaned_on_restart");
+
+  state = mustReduce(state, retryScheduled);
+  assert.equal(state.tasks.T1802?.condition, "retryWait");
+  assert.equal(state.tasks.T1802?.leasedTo, null);
+});
+
 test("Scenario T: WaitResolved(block) blocks task and propagates summary to parent", () => {
   let state = createInitialState();
   state = mustReduce(state, createTask("T1900", 1));
diff --git a/core/test/validator.test.ts b/core/test/validator.test.ts
index eb27d25..4638cc9 100644
--- a/core/test/validator.test.ts
+++ b/core/test/validator.test.ts
@@ -63,6 +63,34 @@ test("validator rejects non-monotonic fence token", () => {
   assert.equal(error.code, "fence_not_monotonic");
 });
 
+test("validator rejects LeaseGranted with empty agent id", () => {
+  const state = bootstrapState();
+
+  const invalidLease: Event = {
+    type: "LeaseGranted",
+    taskId: "T10",
+    ts: 2,
+    fenceToken: 1,
+    agentId: "   ",
+    phase: "analysis",
+    leaseTimeout: 60_000,
+    sessionId: "sess-1",
+    sessionType: "fresh",
+    contextBudget: 512,
+    agentContext: {
+      sessionId: "sess-1",
+      agentId: "   ",
+      memoryRef: null,
+      contextTokens: null,
+      modelId: "test",
+    },
+  };
+
+  const error = validateEvent(state, invalidLease);
+  assert.ok(error);
+  assert.equal(error.code, "invalid_agent_id");
+});
+
 test("validator enforces failure summary on TaskFailed", () => {
   const state = bootstrapState();
 
diff --git a/core/validator.ts b/core/validator.ts
index 5c6d4fd..eb7ec6f 100644
--- a/core/validator.ts
+++ b/core/validator.ts
@@ -321,6 +321,14 @@ function validateSessionPolicy(
   return null;
 }
 
+function validateLeaseAgent(event: Extract<Event, { type: "LeaseGranted" }>): ValidationError | null {
+  if (!nonEmptyText(event.agentId)) {
+    return mkError(event, "invalid_agent_id", "LeaseGranted.agentId must be non-empty.");
+  }
+
+  return null;
+}
+
 function validateWaitAction(event: Extract<Event, { type: "WaitResolved" }>): ValidationError | null {
   if (event.action === "block" && !validFailureSummary(event.summary)) {
     return mkError(
@@ -493,6 +501,11 @@ export function validateEvent(state: SystemState, event: Event): ValidationError
         return mkError(event, "invalid_context_budget", "Context budget must be a positive integer.");
       }
 
+      const agentError = validateLeaseAgent(event);
+      if (agentError) {
+        return agentError;
+      }
+
       const attempt = task.attempts[event.phase];
       if (attempt.used >= attempt.max) {
         return mkError(event, "attempt_budget_exhausted", "Attempt budget exhausted for current phase.", {
diff --git a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md
new file mode 100644
index 0000000..699d221
--- /dev/null
+++ b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md
@@ -0,0 +1,159 @@
+# T1710 — Capability-First Orchestration Hardening
+
+## Executive recommendation
+
+This program is **too large and cross-cutting to execute as a single task**. It should be decomposed into ordered child tasks with one parent artifact that fixes the architecture, sequencing, and acceptance criteria.
+
+The failure pattern is not "one bug". It is a systems problem caused by trying to decompose and execute uncertain missions **before** the system knows:
+- what capabilities are actually available,
+- which prerequisites are missing,
+- which steps are reversible vs. irreversible,
+- how partial progress should be preserved,
+- when repeated failure should trigger a strategy change instead of more retries.
+
+## Problem framing
+
+The target workflow class has five characteristics:
+1. **High uncertainty at start** — entity matching, environment state, or account status may be unknown.
+2. **Browser-mediated execution** — progress depends on live UI state, auth, and fragile selectors.
+3. **Partially irreversible actions** — clicks, submissions, trades, messages, or confirmations may have consequences.
+4. **Mixed research + execution** — discovery work is often bundled with deterministic action steps.
+5. **Infrastructure noise** — browser relay, auth state, tool health, and mutation-path failures create false task churn.
+
+If the orchestrator treats these as ordinary deterministic tasks, it creates the same failure loop:
+- decompose too early,
+- assign execution before readiness,
+- lose partial findings in review/aggregate handoffs,
+- retry the same failing path,
+- escalate risk near irreversible steps.
+
+## Strategic design principles
+
+### 1. Capability-first before decomposition
+Before generating child tasks, the system should produce a **mission capability snapshot**:
+- available tools and runtimes,
+- authenticated systems/accounts,
+- browser availability and attachment state,
+- permission constraints,
+- verification channels,
+- human-approval requirements,
+- known blockers.
+
+If readiness is low, the system should create **prerequisite tasks** first, not execution tasks.
+
+### 2. Separate discovery from deterministic execution
+A mission should not begin with an execution plan when the main unknown is still identification, feasibility, auth, or state verification.
+
+Use two lanes:
+- **Discovery lane**: identify entities, inspect environment, map options, and gather evidence.
+- **Execution lane**: perform deterministic, validated steps only after inputs are stable.
+
+This preserves operator clarity and reduces bogus "execution failures" that are really unresolved discovery problems.
+
+### 3. Preserve partial progress as first-class artifacts
+When a child task uncovers verified facts but cannot finish the end-to-end mission, that output must survive reviews, retries, and replanning.
+
+Required artifact types:
+- capability snapshots,
+- matched entities / rejected candidates,
+- prerequisite checklist state,
+- evidence bundles,
+- environment fingerprints,
+- execution-ready plans,
+- approval packets for irreversible actions.
+
+The parent should aggregate these artifacts instead of forcing children into a binary success/fail shape.
+
+### 4. Fingerprint failure modes, then switch strategy
+Repeated retries are only rational if the failure mode is transient. The orchestrator should classify failures into buckets such as:
+- auth missing/expired,
+- browser relay unattached,
+- selector / UI drift,
+- external system ambiguity,
+- runtime/tool unavailable,
+- mutation accepted but verification unavailable,
+- approval required but not granted.
+
+Each class needs a defined next action: retry, reroute, decompose prerequisite, request approval, or stop.
+
+### 5. Approval-gated lane for irreversible actions
+Irreversible or safety-sensitive actions should require a specific lane with:
+- explicit action summary,
+- preconditions satisfied,
+- target/entity verified,
+- rollback possibilities documented,
+- approval token or human confirmation captured,
+- post-action verification defined.
+
+This should not share the same semantics as low-risk research tasks.
+
+## Proposed decomposition
+
+### Child 1 — Mission capability registry and readiness model
+**Goal:** define machine-readable representation of capabilities, prerequisites, and readiness scoring.  
+**Output:** schema + readiness levels + examples + integration points.
+
+### Child 2 — Execution preflight and prerequisite detection
+**Goal:** build the gate that runs before decomposition/execution to detect missing auth, tools, browser state, permissions, and verification channels.  
+**Output:** preflight rules, fail-fast decisions, prerequisite task generation rules.
+
+### Child 3 — Grounding and uncertain-entity evaluation framework
+**Goal:** handle missions where the target entity, account, page, or record is uncertain.  
+**Output:** match confidence model, evidence requirements, safe stopping conditions.
+
+### Child 4 — Failure fingerprinting and strategy switching
+**Goal:** stop naive retry loops and route repeated failures to the right next strategy.  
+**Output:** failure taxonomy, retry budgets, switching rules, observability requirements.  
+**Design artifact:** `docs/ops/t1712-failure-fingerprinting-strategy-switching.md`
+
+### Child 5 — Approval-gated irreversible-action lane
+**Goal:** create a separate workflow for steps with material consequences.  
+**Output:** approval packet schema, gate conditions, execution/verification semantics.
+
+### Child 6 — Aggregate policy and artifact-first closure semantics
+**Goal:** preserve partial progress through review and parent aggregation.  
+**Output:** child completion semantics, artifact contract, parent merge rules, review checklist.
+
+### Child 7 — Cross-repo implementation and validation plan
+**Goal:** map where changes belong across taskcore and colony, sequence rollout, and define tests.  
+**Output:** implementation order, repo ownership, migration plan, acceptance tests.
+
+## Ordering recommendation
+
+Recommended execution order:
+1. Capability registry and readiness model
+2. Execution preflight and prerequisite detection
+3. Grounding / uncertain-entity evaluation
+4. Failure fingerprinting and strategy switching
+5. Approval-gated lane for irreversible actions
+6. Aggregate policy and artifact-first closure semantics
+7. Cross-repo implementation and validation plan
+
+Rationale:
+- readiness and preflight are foundation layers,
+- grounding determines whether execution should even begin,
+- failure handling is only useful after readiness semantics exist,
+- approval gating depends on stable preconditions and verification semantics,
+- aggregate closure should be shaped after artifact types are defined,
+- implementation planning should be last so it reflects the final architecture.
+
+## Acceptance criteria for the parent task
+
+T1710 should only be considered complete when it produces:
+- a parent architecture memo with the final system model,
+- child tasks covering all six functional areas plus rollout/validation,
+- explicit artifact contracts between children,
+- a recommended order of implementation,
+- concrete acceptance tests for the integrated workflow.
+
+## What not to do
+
+- Do **not** patch a single historical workflow.
+- Do **not** encode browser-specific hacks as general orchestration policy.
+- Do **not** collapse discovery, execution, and approval into one task type.
+- Do **not** use success/failure alone as the parent aggregation model.
+- Do **not** allow irreversible execution without explicit preconditions and approval semantics.
+
+## Immediate next move
+
+Decompose T1710 into the ordered child tasks above, using domain-agnostic language and artifact-focused outputs. The parent remains responsible for the integrated architecture and rollout sequence.
diff --git a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md
new file mode 100644
index 0000000..576c279
--- /dev/null
+++ b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md
@@ -0,0 +1,449 @@
+# T1712 — Failure fingerprinting, strategy switching, and dynamic retry budgets
+
+## Problem statement
+
+TaskCore currently treats most agent failures as variations of the same event:
+- `task-executor.mjs` increments a single `retryCount`
+- work is re-queued with generic exponential backoff
+- only rate limits get a distinct path
+- once `MAX_RETRIES` is exhausted, the task is simply blocked
+
+That is too lossy for uncertain, browser-mediated, or dependency-heavy work. The system cannot distinguish:
+- a stale login that should wake a shared auth blocker
+- anti-bot / access denial that should halt sibling work on the same target
+- missing inputs that should create a prerequisite task instead of more retries
+- invalid state transitions that require replanning, not repetition
+- verification failures after a mutation, where autonomy should stop and escalate
+- repeated cost / provider exhaustion, where dispatch should shift strategy globally
+
+The result is sibling churn: multiple leaves burn retries on the same blocker even when the next rational move is shared blocker-removal or mission replanning.
+
+---
+
+## Design goals
+
+1. **Canonical failure identity** — repeated failures with the same operative cause produce the same fingerprint.
+2. **Scope-aware routing** — leaf-local failures stay local; shared blockers fan in to one prerequisite task; global exhaustion triggers broader throttling.
+3. **Strategy switching over blind retries** — retry only when the failure class is plausibly transient.
+4. **Task-kind-aware budgets** — execution, aggregate, and artifact-only tasks do not share the same retry policy.
+5. **Inspectable decisions** — every retry, pause, reroute, and blocker promotion is visible in task metadata and executor outcome logs.
+6. **Fail-closed near irreversible work** — verification and state-transition failures on execution tasks escalate before more automation is attempted.
+
+### Non-goals
+
+- Replacing provider allocation gating from T755.
+- Replacing the recovery breaker engine for host/service remediation.
+- Solving target grounding/entity ambiguity (that belongs to T1704/T2110-style grounding work).
+
+---
+
+## Where this policy plugs in
+
+Primary integration points:
+1. **`scripts/task-executor.mjs`** — authoritative classification, retry budgeting, and routing.
+2. **task metadata in `.taskmaster/tasks/tasks.json`** — persistent fingerprint, counters, blocker linkage, and route decisions.
+3. **executor outcome log** (`data/task-dashboard/executor_outcomes.jsonl`) — append fingerprint + routing evidence.
+4. **dashboard export / APIs** — surface repeated failure clusters, promoted blockers, and retry-budget exhaustion.
+
+This task defines the contract and routing model. A later execution task should implement the plumbing.
+
+---
+
+## 1) Canonical failure fingerprint model
+
+A **failure fingerprint** is the deduplicated identity of a task failure for orchestration purposes.
+
+### Proposed schema
+
+```json
+{
+  "version": "v1",
+  "fingerprintId": "ffp_01HV...",
+  "taskId": 1712,
+  "taskKind": "execution",
+  "runPhase": "work",
+  "failureClass": "auth_session_failure",
+  "scope": "shared_prerequisite",
+  "resourceKey": "telegram:account:primary",
+  "surface": "browser_relay",
+  "reasonCode": "session_expired",
+  "signature": {
+    "provider": "openai-codex",
+    "model": "gpt-5.4",
+    "exitType": "agent_crash",
+    "stderrClass": "login_required",
+    "targetRef": "telegram-web"
+  },
+  "dedupeKey": "auth_session_failure|telegram:account:primary|browser_relay|session_expired",
+  "firstSeenAt": "2026-03-13T09:00:00Z",
+  "lastSeenAt": "2026-03-13T09:07:00Z",
+  "attempts": 3,
+  "affectedTaskIds": [1712, 1718, 1721],
+  "recommendedStrategy": "wake_or_create_blocker",
+  "recommendedBlockerKey": "blocker:auth:telegram:account:primary"
+}
+```
+
+### Required fields
+
+| Field | Meaning |
+|---|---|
+| `taskKind` | `execution`, `aggregate`, `artifact_only`, `review`, `capability_probe`, etc. |
+| `runPhase` | `work` or `review` |
+| `failureClass` | canonical orchestrator-facing class |
+| `scope` | `leaf_local`, `shared_prerequisite`, or `global_budget` |
+| `resourceKey` | normalized target of the blocker (`browser:relay:chrome`, `human:kas`, `provider:openai-codex`) |
+| `reasonCode` | finer-grained sub-cause |
+| `dedupeKey` | stable routing key used to collapse repeats |
+| `attempts` | count of repeated hits within policy window |
+| `recommendedStrategy` | `retry_same_path`, `switch_strategy`, `wake_or_create_blocker`, `pause_for_review`, `global_throttle` |
+
+### Scope semantics
+
+- **`leaf_local`** — retry/replan only this task. Example: malformed prompt for one artifact-only task.
+- **`shared_prerequisite`** — stop retrying sibling leaves and promote a shared blocker. Example: expired auth, missing credential, inaccessible website.
+- **`global_budget`** — provider/cost saturation or broad platform outage; gate future dispatch and avoid local churn.
+
+---
+
+## 2) Canonical failure classes
+
+These are the minimum classes required for T1712 acceptance.
+
+| Failure class | Typical signals | Default scope | Default strategy |
+|---|---|---|---|
+| `auth_session_failure` | login required, expired cookie/session, wallet disconnected, missing permission grant | `shared_prerequisite` | pause affected leaves, wake/create auth blocker |
+| `access_denial_antibot` | captcha, 403, antibot page, WAF deny, account challenge | `shared_prerequisite` | stop automation path, request human unblock / alternate channel |
+| `missing_input` | required ID/file/approval/parameter absent | `shared_prerequisite` if shared, else `leaf_local` | create prerequisite task or request user input |
+| `invalid_state_transition` | task tries action from wrong state, precondition invalid, already-submitted/closed/cancelled | `leaf_local` or `shared_prerequisite` if state is mission-wide | replan from refreshed state; no same-path retry |
+| `verification_failure` | mutation possibly happened but postcondition cannot be proven; conflicting checks | `leaf_local` on single leaf, fail-closed for execution | halt autonomy, require verification/review path |
+| `cost_exhaustion_repeated` | provider denied for budget/quota reasons across attempts/windows | `global_budget` | throttle dispatch, shift provider/model/priority policy |
+
+Recommended additional classes for implementation completeness:
+- `tool_runtime_unavailable`
+- `ui_selector_drift`
+- `rate_limit_transient`
+- `dependency_blocked`
+- `human_approval_missing`
+
+### Failure-class notes
+
+#### `auth_session_failure`
+Examples:
+- browser relay attached but session logged out
+- API token missing or expired
+- wallet connector present but no connected account
+
+Rule: after the second matching hit within the policy window, stop local retries and create/wake one shared auth blocker keyed by the affected account/resource.
+
+#### `access_denial_antibot`
+Rule: never let sibling leaves keep probing the same blocked surface. Switch to a human-assisted or alternate-channel strategy immediately after first confirmed match.
+
+#### `missing_input`
+Rule: if the missing input is shared by multiple children (e.g. target account id, approval token, attachment), collapse it into one prerequisite task and mark dependent leaves as waiting/blocked-by-dependency.
+
+#### `invalid_state_transition`
+Rule: do not spend retry budget repeating an action against a stale assumption. Refresh state, then either replan or close as not-applicable.
+
+#### `verification_failure`
+Rule: for execution tasks, verification failure after a mutation is **not retry-equivalent** to a normal crash. The system must stop and request human review or explicit verification work.
+
+#### `cost_exhaustion_repeated`
+Rule: once the same quota/budget fingerprint repeats across tasks or time windows, it becomes a dispatch-policy problem, not a leaf problem. Route to global throttling or provider switch.
+
+---
+
+## 3) Fingerprint derivation rules
+
+The executor should derive a fingerprint in four passes:
+
+1. **Normalize runtime facts**
+   - task kind
+   - run phase
+   - assignee/reviewer
+   - exit code / signal
+   - error tail classification
+   - known provider/model metadata
+   - target resource / surface
+
+2. **Assign failure class**
+   - use deterministic rule table before any model-based classifier
+   - allow only a bounded fallback classifier for unknown cases
+
+3. **Resolve routing scope**
+   - infer whether the cause is leaf-local, shared prerequisite, or global budget
+
+4. **Build dedupe key**
+   - `failureClass | resourceKey | surface | reasonCode`
+   - exclude volatile text (timestamps, raw stack traces, run ids)
+
+### Example derivations
+
+```text
+stderr: "Telegram Web shows login required"
+=> failureClass=auth_session_failure
+=> resourceKey=telegram:web:primary
+=> scope=shared_prerequisite
+=> dedupeKey=auth_session_failure|telegram:web:primary|browser_relay|login_required
+```
+
+```text
+stderr: "429 Too Many Requests from provider openai-codex"
+=> failureClass=rate_limit_transient
+=> resourceKey=provider:openai-codex
+=> scope=leaf_local (single task) OR global_budget once repeated threshold trips
+=> dedupeKey=rate_limit_transient|provider:openai-codex|dispatch|429
+```
+
+```text
+stderr: "proposal already published"
+=> failureClass=invalid_state_transition
+=> resourceKey=proposal:1234
+=> scope=shared_prerequisite if many leaves assume draft state
+=> dedupeKey=invalid_state_transition|proposal:1234|mutation_path|already_published
+```
+
+---
+
+## 4) Strategy-switching rules
+
+### Canonical strategies
+
+| Strategy | Use when | Result |
+|---|---|---|
+| `retry_same_path` | transient/local issue and retry budget remains | requeue same task with backoff |
+| `switch_strategy` | same objective still valid but current path is irrational | reroute to alternate tool/channel/plan |
+| `wake_or_create_blocker` | repeated shared blocker across leaves | create or wake one prerequisite task and pause dependents |
+| `pause_for_review` | verification or safety-sensitive ambiguity | send to review / human confirmation |
+| `global_throttle` | provider or budget exhaustion spans multiple tasks | deny/defer future dispatch until healthy |
+
+### Routing decision table
+
+| Failure class | First hit | Repeated hit | Exhausted state |
+|---|---|---|---|
+| `auth_session_failure` | retry once if evidence is weak; otherwise create blocker immediately | wake/create shared auth blocker; pause sibling leaves | mark dependency blocker and stop automation until prerequisite closes |
+| `access_denial_antibot` | stop same-path retries; request alternate route | shared blocker + human review | quarantine target surface for cooldown window |
+| `missing_input` | create/wake prerequisite or ask for input | collapse siblings onto same blocker | leave waiting on prerequisite, no further retries |
+| `invalid_state_transition` | refresh state and re-evaluate | replan or close as superseded | no further retries on stale path |
+| `verification_failure` | require explicit verification task / review | block execution lane on target | escalate to human with evidence bundle |
+| `cost_exhaustion_repeated` | apply local defer/backoff | trigger provider/model/policy switch or dispatch gate deny | global throttle until healthy window returns |
+
+### Shared blocker promotion rule
+
+When all of the following hold, the executor promotes a blocker task:
+1. `scope == shared_prerequisite`
+2. same `dedupeKey` occurs on **>= 2 tasks** or **>= 2 attempts on one task** within the policy window
+3. a blocker with the same `recommendedBlockerKey` is not already active
+
+Result:
+- create or wake one blocker task
+- attach `metadata.failureFingerprint.blockerTaskId`
+- mark affected leaves as dependency-blocked / waiting on that blocker
+- suppress additional same-fingerprint retries until blocker state changes
+
+### Replanning rule
+
+If a task hits `invalid_state_transition` or `verification_failure`, the next action should be a replan/verification step, not another leaf retry. The executor should either:
+- create a child task for state refresh / verification, or
+- send the task back to review with the fingerprint attached.
+
+---
+
+## 5) Dynamic retry budget policy
+
+Retry budgets must be keyed by **task kind** and **failure class**, not one global `MAX_RETRIES`.
+
+### Policy table (recommended v1)
+
+| Task kind | Failure class | Auto retries | Backoff class | On exhaustion |
+|---|---|---:|---|---|
+| `execution` | `rate_limit_transient` | 2 | long | switch provider or defer via gate |
+| `execution` | `auth_session_failure` | 1 | short | create/wake auth blocker |
+| `execution` | `access_denial_antibot` | 0 | none | human unblock / alternate path |
+| `execution` | `missing_input` | 0 | none | prerequisite task |
+| `execution` | `invalid_state_transition` | 0 | none | refresh + replan |
+| `execution` | `verification_failure` | 0 | none | review / verification task |
+| `execution` | `tool_runtime_unavailable` | 1 | medium | reroute tool / pause |
+| `aggregate` | `dependency_blocked` | 0 | none | wait for required children / blocker |
+| `aggregate` | `missing_input` | 0 | none | request missing artifact coverage |
+| `aggregate` | `tool_runtime_unavailable` | 1 | short | rerun reducer/export path |
+| `aggregate` | `verification_failure` | 1 | short | review aggregate evidence |
+| `artifact_only` | `tool_runtime_unavailable` | 2 | short | reroute agent/tool |
+| `artifact_only` | `missing_input` | 0 | none | request source material |
+| `artifact_only` | `invalid_state_transition` | 0 | none | usually close/supersede, not retry |
+| `artifact_only` | `cost_exhaustion_repeated` | 1 | long | defer until budget recovers |
+| `review` | `tool_runtime_unavailable` | 1 | short | reroute reviewer |
+| `review` | `verification_failure` | 0 | none | escalate to human reviewer |
+
+### Why the policies differ
+
+- **Execution tasks** carry the most risk; most non-transient classes should not auto-retry.
+- **Aggregate tasks** should rarely retry; repeated child blockers are usually dependency issues, not execution failures.
+- **Artifact-only tasks** can tolerate slightly more retry on tooling failures because they do not directly mutate external state.
+
+### Budget accounting model
+
+Track two counters per task attempt window:
+1. **`pathRetryCount`** — retries on the same strategy/path
+2. **`strategySwitchCount`** — number of alternate paths already tried
+
+This prevents a task from escaping budget control by bouncing endlessly between weak alternatives.
+
+Recommended defaults:
+- `execution`: `pathRetryCount <= 2`, `strategySwitchCount <= 1`
+- `aggregate`: `pathRetryCount <= 1`, `strategySwitchCount <= 1`
+- `artifact_only`: `pathRetryCount <= 2`, `strategySwitchCount <= 2`
+
+---
+
+## 6) Proposed metadata contract
+
+Add the following metadata shape to task records:
+
+```json
+{
+  "metadata": {
+    "retryPolicy": {
+      "taskKind": "execution",
+      "pathRetryCount": 1,
+      "strategySwitchCount": 0,
+      "budgetWindow": "30m",
+      "lastBudgetDecision": "wake_or_create_blocker"
+    },
+    "failureFingerprint": {
+      "fingerprintId": "ffp_01HV...",
+      "failureClass": "auth_session_failure",
+      "scope": "shared_prerequisite",
+      "dedupeKey": "auth_session_failure|telegram:web:primary|browser_relay|login_required",
+      "attemptsWindow": 2,
+      "firstSeenAt": "2026-03-13T09:00:00Z",
+      "lastSeenAt": "2026-03-13T09:07:00Z",
+      "recommendedStrategy": "wake_or_create_blocker",
+      "blockerTaskId": 1730
+    },
+    "blockedByFingerprint": true,
+    "sharedBlockerKey": "blocker:auth:telegram:web:primary"
+  }
+}
+```
+
+### Executor outcome log extension
+
+Each `executor_outcomes.jsonl` record should append:
+- `taskKind`
+- `failureClass`
+- `fingerprintDedupeKey`
+- `routingDecision`
+- `scope`
+- `strategySwitched` (bool)
+- `blockerTaskId` (if any)
+
+This is the minimum observability needed to prove sibling churn actually dropped after rollout.
+
+---
+
+## 7) Shared blocker lifecycle
+
+### Blocker task creation contract
+
+When promoting shared blocker work, the system should create a task with:
+- kind: `capability_probe` or `blocker_removal`
+- title: deterministic and resource-based
+- description: include fingerprint class, affected resource, and evidence bundle
+- metadata:
+  - `blockerKey`
+  - `sourceFingerprint`
+  - `affectedTaskIds`
+  - `createdFromFailureRouter=true`
+
+### Wake vs create
+
+- **Wake existing blocker** when an active/pending blocker has the same `blockerKey`.
+- **Create new blocker** only when no active blocker exists.
+
+### Leaf behavior while blocker active
+
+Affected leaves should not continue ordinary retry scheduling. Instead they should move to a dependency-held condition with:
+- blocker task id
+- blocker key
+- fingerprint id
+- timestamp of last routed match
+
+This is the mechanism that stops sibling retry burn.
+
+---
+
+## 8) Observability and dashboards
+
+Required dashboard/reporting surfaces:
+1. **Top repeated fingerprints** in the last 24h
+2. **Shared blocker promotions** and number of leaves collapsed behind each blocker
+3. **Retries avoided** after blocker promotion
+4. **Failure-class breakdown by task kind**
+5. **Global budget throttles** with provider/model linkage
+
+Key success metrics:
+- drop in repeated identical failures per sibling set
+- increase in blocker reuse rate
+- reduction in tasks blocked only after max retries
+- fewer execution tasks auto-retrying on verification failures
+
+---
+
+## 9) Acceptance tests for the implementation task
+
+### A. Shared auth blocker
+1. Two execution leaves fail with the same expired-session fingerprint.
+2. System creates/wakes one auth blocker.
+3. Sibling leaves stop consuming retries.
+4. Both leaves reference the same blocker task id/key.
+
+### B. Anti-bot/access denial
+1. First confirmed anti-bot fingerprint occurs on an execution task.
+2. Executor does **not** requeue same path.
+3. Alternate route or human unblock task is requested.
+
+### C. Missing shared input
+1. Multiple tasks lack the same approval token / file.
+2. One prerequisite task is created.
+3. Additional failures attach to existing blocker instead of creating duplicates.
+
+### D. Invalid state transition
+1. Execution task tries to mutate an already-finalized resource.
+2. No same-path retry occurs.
+3. Task routes to refresh/replan or closes as superseded.
+
+### E. Verification failure
+1. Mutation step appears to succeed but verification check is inconclusive.
+2. Task does not spend standard retry budget.
+3. Task escalates to review/verification work.
+
+### F. Dynamic budgets by task kind
+1. `artifact_only` task with tool outage gets >0 retries.
+2. `aggregate` task with dependency blocker gets 0 same-path retries.
+3. `execution` task with auth failure gets <=1 retry before blocker promotion.
+
+### G. Repeated cost exhaustion
+1. Same provider quota exhaustion hits multiple tasks in window.
+2. A global throttle / provider-switch path is activated.
+3. Future dispatch is deferred rather than burning leaf retries.
+
+---
+
+## 10) Implementation order recommendation
+
+1. Add fingerprint schema + deterministic classifier in executor.
+2. Persist metadata and outcome-log extensions.
+3. Add blocker promotion / wake semantics.
+4. Add per-task-kind retry policy table.
+5. Add dashboard aggregation + regression tests.
+
+This order ensures the system can first *see* repeated failure identity, then *route* it, then *enforce* differentiated budgets.
+
+---
+
+## Bottom line
+
+T1712 should change TaskCore from **"every failure increments one retry counter"** to **"each failure class carries a scoped fingerprint and a bounded next strategy."**
+
+That is the key behavior change needed to stop repeated sibling churn, promote blocker-removal work when appropriate, and make retry policy depend on both **what failed** and **what kind of task failed**.
diff --git a/docs/task-cli-spec.md b/docs/task-cli-spec.md
index 661d03a..af2dfb8 100644
--- a/docs/task-cli-spec.md
+++ b/docs/task-cli-spec.md
@@ -510,6 +510,105 @@ Abort a pending decomposition session without committing.
 task decompose cancel
 ```
 
+#### `task plan` / `task decompose plan` — one-shot plan-based decomposition
+
+Submit a complete decomposition in one command from a structured plan text.
+Uses the same `POST /tasks/:id/decompose` endpoint as a regular commit, so
+children appear in the dashboard immediately.
+
+`task plan` is the low-friction shortcut for agents; `task decompose plan` is
+its explicit decomposition-scoped equivalent.
+
+```
+task plan "<markdown>"
+task plan --file <path-to-plan.md>
+task plan --stdin
+    [--strategy sequential|parallel]    (default: sequential)
+
+# Equivalent forms
+task decompose plan --items "<markdown>"
+task decompose plan --file <path-to-plan.md>
+task decompose plan --stdin
+    [--strategy sequential|parallel]    (default: sequential)
+```
+
+**Plan format:**
+
+```markdown
+# Optional Section Heading
+- [ ] Checklist step title (cost: 10, assignee: coder)
+- [x] Already-checked step (cost: 5)
+1. Ordered list step
+- Plain bullet step
+  Indented lines (2+ spaces) become the child description.
+  Multiple continuation lines are joined with newlines.
+```
+
+Supported item types: checklist (`- [ ]`), ordered (`1.`), plain bullet (`-`).
+
+Headings (`#`, `##`, etc.) set a context prefix applied to all following items
+until the next heading: `"Section: Step title"`.
+
+**Trailing metadata** (optional, at end of item line):
+
+```
+(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true)
+```
+
+Supported keys: `cost`, `assignee`, `reviewer`, `skip-analysis`.
+
+**Plan editing / propagation semantics (v1):**
+
+- The imported list order becomes the child task order. With the default
+  `sequential` strategy, that order also drives which child is ready first.
+- Checkbox state in the source markdown is **not** treated as task completion.
+  Checked and unchecked items both import as child tasks; completion is tracked
+  by taskcore child state after materialization.
+- Before materialization, edit the markdown and re-run `task plan`.
+- After materialization, the child tasks become the durable source of truth
+  shown in taskcore/dashboard. If the remaining plan needs to change, create a
+  new decomposition version for the remaining work rather than mutating
+  completed child history in place.
+
+**Cost allocation:**
+
+- Items with explicit `cost` keep that value.
+- Items without `cost` share the remaining budget evenly (integer-cent
+  arithmetic — no float drift).
+- Errors clearly if explicit costs exceed the parent's remaining budget, or
+  if there is no budget left for uncosted items.
+
+**Example:**
+
+```markdown
+# Infrastructure
+- [ ] Provision database (cost: 15, assignee: infra)
+- [ ] Configure networking (cost: 10, assignee: infra)
+
+# Application
+- [ ] Implement API endpoints (assignee: coder, reviewer: lead)
+  Build REST endpoints for user CRUD operations.
+- [ ] Write integration tests
+```
+
+```
+task plan --file plan.md --strategy sequential
+```
+
+**Output:**
+
+```
+--- Plan materialized for T42 ---
+
+Created 4 child tasks:
+  T101: Infrastructure: Provision database   15.00
+  T102: Infrastructure: Configure networking  10.00
+  T103: Application: Implement API endpoints  12.50
+  T104: Application: Write integration tests  12.50
+
+Parent task is now a coordinator. Children will execute the plan.
+```
+
 ### 3.6 Review Workflow (guided, multi-step)
 
 When a task is in the `review` phase and claimed by a reviewer.
diff --git a/middle/http.ts b/middle/http.ts
index 2278a5f..6c471a3 100644
--- a/middle/http.ts
+++ b/middle/http.ts
@@ -712,11 +712,15 @@ function handleClaimTask(
       return { status: 409, body: { error: "terminal_task", message: `Task ${taskId} is terminal` } };
     }
 
-    const agentId = typeof b.agentId === "string" && b.agentId.trim().length > 0
-      ? b.agentId.trim()
-      : typeof b.agent === "string" && b.agent.trim().length > 0
-      ? b.agent.trim()
-      : "unknown";
+    const requestedAgentId = typeof b.agentId === "string"
+      ? b.agentId
+      : typeof b.agent === "string"
+      ? b.agent
+      : null;
+    if (requestedAgentId !== null && requestedAgentId.trim().length === 0) {
+      return { status: 400, body: { error: "invalid_agent_id", message: "agentId must be non-empty." } };
+    }
+    const agentId = requestedAgentId?.trim() || "unknown";
 
     const fenceToken = task.currentFenceToken + 1;
     const sessionId = crypto.randomUUID();
@@ -971,8 +975,11 @@ function handleListTasks(
       assignee: t.metadata["assignee"] ?? null,
       reviewer: t.metadata["reviewer"] ?? null,
       priority: t.metadata["priority"] ?? "medium",
+      activeAgent: t.leasedTo,
+      leaseExpiresAt: t.leaseExpiresAt,
       createdAt: t.createdAt,
       updatedAt: t.updatedAt,
+      updatedAtMs: t.updatedAt,
     }));
 
     return { status: 200, body: { tasks: summaries } };
@@ -2900,8 +2907,11 @@ function collectAttentionTasks(core: Core): {
       (t) =>
         !t.terminal &&
         t.condition === "active" &&
-        t.leaseExpiresAt !== null &&
-        t.leaseExpiresAt <= now,
+        (
+          t.leaseExpiresAt === null ||
+          (typeof t.leasedTo !== "string" || t.leasedTo.trim().length === 0) ||
+          t.leaseExpiresAt <= now
+        ),
     )
     .map(toSummary);
 
diff --git a/middle/journal.ts b/middle/journal.ts
index bb77768..d840023 100644
--- a/middle/journal.ts
+++ b/middle/journal.ts
@@ -255,6 +255,7 @@ function git(cwd: string, args: string[]): string {
     cwd,
     encoding: "utf-8",
     timeout: 10_000,
+    stdio: ["ignore", "pipe", "pipe"],
     env: {
       ...process.env,
       GIT_TERMINAL_PROMPT: "0",
diff --git a/middle/prompt.ts b/middle/prompt.ts
index 99790ab..32e0c95 100644
--- a/middle/prompt.ts
+++ b/middle/prompt.ts
@@ -412,7 +412,8 @@ function appendDecompositionInstructions(sections: string[], task: Task, config:
   sections.push("");
   sections.push("## How to Submit Your Decomposition");
   sections.push("");
-  sections.push("Use the incremental decompose CLI — it guides you step by step:");
+  sections.push("Preferred when you already have a checklist: use the one-shot plan flow (`task plan --file plan.md` or `task decompose plan --stdin`).");
+  sections.push("If you need to build children incrementally, use the step-by-step decompose flow below:");
   sections.push("");
   sections.push("```bash");
   sections.push(`# Step 1: Start a decomposition session`);
diff --git a/middle/test/http.test.ts b/middle/test/http.test.ts
index f73e336..af87f5b 100644
--- a/middle/test/http.test.ts
+++ b/middle/test/http.test.ts
@@ -1,3 +1,4 @@
+import { execFileSync } from "node:child_process";
 import * as http from "node:http";
 import { test, describe, beforeEach, afterEach } from "node:test";
 import * as assert from "node:assert/strict";
@@ -110,6 +111,16 @@ async function teardown(): Promise<void> {
   }
 }
 
+function initRepo(repoPath: string): void {
+  fs.mkdirSync(repoPath, { recursive: true });
+  execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" });
+  execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" });
+  execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" });
+  fs.writeFileSync(path.join(repoPath, "README.md"), "# test\n", "utf-8");
+  execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" });
+  execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" });
+}
+
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
@@ -209,6 +220,35 @@ describe("HTTP API", () => {
     assert.equal(body.tasks.length, 2);
   });
 
+  test("GET /tasks summaries include active agent and lease metadata", async () => {
+    await request("POST", "/tasks", {
+      title: "Leased task",
+      description: "Should expose lease info in summary",
+      assignee: "coder",
+    });
+    const claimRes = await request("POST", "/tasks/1/claim", {
+      agentId: "coder",
+      source: "test",
+    });
+    assert.equal(claimRes.status, 200);
+
+    const res = await request("GET", "/tasks");
+    assert.equal(res.status, 200);
+    const body = res.body as {
+      tasks: Array<{
+        id: string;
+        activeAgent: string | null;
+        leaseExpiresAt: number | null;
+        updatedAtMs: number | null;
+      }>;
+    };
+    const task = body.tasks.find((entry) => entry.id === "1");
+    assert.ok(task);
+    assert.equal(task.activeAgent, "coder");
+    assert.equal(typeof task.leaseExpiresAt, "number");
+    assert.equal(task.updatedAtMs !== null && task.updatedAtMs > 0, true);
+  });
+
   test("GET /dispatchable lists dispatchable tasks", async () => {
     await request("POST", "/tasks", {
       title: "Ready task",
@@ -232,6 +272,21 @@ describe("HTTP API", () => {
     assert.equal(res.status, 404);
   });
 
+  test("POST /tasks/:id/claim rejects blank agent id", async () => {
+    await request("POST", "/tasks", {
+      title: "Claim validation",
+      description: "Blank claim agent should fail",
+    });
+
+    const res = await request("POST", "/tasks/1/claim", {
+      agentId: "   ",
+      source: "test",
+    });
+    assert.equal(res.status, 400);
+    const body = res.body as { error: string };
+    assert.equal(body.error, "invalid_agent_id");
+  });
+
   test("PATCH /tasks/:id/metadata updates priority", async () => {
     await request("POST", "/tasks", {
       title: "Metadata test",
@@ -462,6 +517,41 @@ describe("HTTP API", () => {
     assert.ok(readBody.content.includes("Starting implementation"));
   });
 
+  test("claim falls back to repo HEAD when requested base branch is missing", async () => {
+    const repoPath = path.join(tmpDir, "code-repo");
+    initRepo(repoPath);
+
+    await request("POST", "/tasks", {
+      title: "Claim fallback test",
+      description: "Claim should still create a code worktree when base branch is missing",
+      assignee: "coder",
+      repo: repoPath,
+      baseBranch: "missing-base",
+      skipAnalysis: true,
+    });
+
+    const claimRes = await request("POST", "/tasks/1/claim", {
+      agentId: "coder",
+      source: "test",
+    });
+    assert.equal(claimRes.status, 200);
+
+    const claimBody = claimRes.body as {
+      workspace?: { codeWorktree?: string | null };
+      warnings?: string[];
+    };
+    assert.ok(claimBody.workspace?.codeWorktree);
+    assert.deepEqual(claimBody.warnings ?? [], []);
+
+    const codeWorktree = claimBody.workspace?.codeWorktree!;
+    assert.equal(fs.existsSync(codeWorktree), true);
+    assert.equal(
+      execFileSync("git", ["branch", "--show-current"], { cwd: codeWorktree, encoding: "utf-8" }).trim(),
+      "task/T1",
+    );
+    assert.equal(fs.existsSync(path.join(codeWorktree, "README.md")), true);
+  });
+
   test("status done completes execution task directly when no reviewer is configured", async () => {
     await request("POST", "/tasks", {
       title: "Direct completion",
diff --git a/middle/test/worktree.test.ts b/middle/test/worktree.test.ts
new file mode 100644
index 0000000..dfa7b0a
--- /dev/null
+++ b/middle/test/worktree.test.ts
@@ -0,0 +1,63 @@
+import { spawnSync } from "node:child_process";
+import * as assert from "node:assert/strict";
+import * as path from "node:path";
+import { test } from "node:test";
+import { fileURLToPath, pathToFileURL } from "node:url";
+
+const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
+const journalModuleUrl = pathToFileURL(path.join(repoRoot, "middle/journal.ts")).href;
+const worktreeModuleUrl = pathToFileURL(path.join(repoRoot, "middle/worktree.ts")).href;
+
+test("task workspace bootstrap stays silent on stderr for fresh branches", () => {
+  const script = [
+    'import * as fs from "node:fs";',
+    'import * as os from "node:os";',
+    'import * as path from "node:path";',
+    `import { initJournalRepo, createTaskBranch, taskBranch } from ${JSON.stringify(journalModuleUrl)};`,
+    `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`,
+    'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-noise-"));',
+    'const repoPath = path.join(tmpDir, "journal");',
+    'initJournalRepo(repoPath);',
+    'createTaskBranch(repoPath, "1");',
+    'createWorktree(repoPath, path.join(tmpDir, "journal-T1"), taskBranch("1"));',
+  ].join("\n");
+
+  const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], {
+    cwd: repoRoot,
+    encoding: "utf-8",
+  });
+
+  assert.equal(result.status, 0, result.stderr || result.stdout);
+  assert.equal(result.stderr.trim(), "");
+});
+
+test("createWorktree falls back to repo HEAD when start point is missing", () => {
+  const script = [
+    'import { execFileSync } from "node:child_process";',
+    'import * as fs from "node:fs";',
+    'import * as os from "node:os";',
+    'import * as path from "node:path";',
+    `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`,
+    'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-fallback-"));',
+    'const repoPath = path.join(tmpDir, "repo");',
+    'fs.mkdirSync(repoPath, { recursive: true });',
+    'execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" });',
+    'execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" });',
+    'execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" });',
+    'fs.writeFileSync(path.join(repoPath, "README.md"), "# test\\n", "utf-8");',
+    'execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" });',
+    'execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" });',
+    'const worktreePath = path.join(tmpDir, "code-T1");',
+    'createWorktree(repoPath, worktreePath, "task/T1", "missing-base");',
+    'const branch = execFileSync("git", ["branch", "--show-current"], { cwd: worktreePath, encoding: "utf-8" }).trim();',
+    'if (branch !== "task/T1") throw new Error(`unexpected branch ${branch}`);',
+    'if (!fs.existsSync(path.join(worktreePath, "README.md"))) throw new Error("README.md missing from fallback worktree");',
+  ].join("\n");
+
+  const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], {
+    cwd: repoRoot,
+    encoding: "utf-8",
+  });
+
+  assert.equal(result.status, 0, result.stderr || result.stdout);
+});
diff --git a/middle/worktree.ts b/middle/worktree.ts
index 2a4d959..a1f11af 100644
--- a/middle/worktree.ts
+++ b/middle/worktree.ts
@@ -23,13 +23,20 @@ export function createWorktree(
   // the worktree doesn't have the keys yet.
   const useGitCrypt = fs.existsSync(path.join(repoPath, ".git", "git-crypt"));
   const noCheckout = useGitCrypt ? "--no-checkout" : null;
+  const resolvedStartPoint = startPoint && refExists(repoPath, startPoint)
+    ? startPoint
+    : undefined;
+  const createBranch = !branchExists(repoPath, branch);
 
   const addArgs = (withNewBranch: boolean): string[] => {
     const args = ["worktree", "add"];
     if (noCheckout) args.push(noCheckout);
+    if (withNewBranch) {
+      args.push("-b", branch);
+    }
     args.push(worktreePath);
-    if (withNewBranch && startPoint) {
-      args.push("-b", branch, startPoint);
+    if (withNewBranch) {
+      if (resolvedStartPoint) args.push(resolvedStartPoint);
     } else {
       args.push(branch);
     }
@@ -38,11 +45,11 @@ export function createWorktree(
 
   const tryAdd = (): void => {
     try {
-      git(repoPath, addArgs(!!startPoint));
+      git(repoPath, addArgs(createBranch));
     } catch (err) {
       const msg = String(err);
       // Branch may already exist — retry without -b
-      if (startPoint && msg.includes("already exists")) {
+      if (createBranch && msg.includes("already exists")) {
         git(repoPath, addArgs(false));
       } else {
         throw err;
@@ -205,12 +212,35 @@ export function cleanupStaleWorktrees(
 // Helpers
 // ---------------------------------------------------------------------------
 
+function branchExists(repoPath: string, branch: string): boolean {
+  return refExists(repoPath, `refs/heads/${branch}`);
+}
+
+function refExists(repoPath: string, ref: string): boolean {
+  try {
+    execFileSync("git", ["rev-parse", "--verify", "--quiet", `${ref}^{commit}`], {
+      cwd: repoPath,
+      encoding: "utf-8",
+      timeout: 30_000,
+      stdio: ["ignore", "pipe", "pipe"],
+      env: {
+        ...process.env,
+        GIT_TERMINAL_PROMPT: "0",
+      },
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
 /** Run a git command. */
 function git(cwd: string, args: string[]): string {
   return execFileSync("git", args, {
     cwd,
     encoding: "utf-8",
     timeout: 30_000,
+    stdio: ["ignore", "pipe", "pipe"],
     env: {
       ...process.env,
       GIT_TERMINAL_PROMPT: "0",
diff --git a/scripts/recover-leaked-leases.ts b/scripts/recover-leaked-leases.ts
new file mode 100644
index 0000000..781f918
--- /dev/null
+++ b/scripts/recover-leaked-leases.ts
@@ -0,0 +1,94 @@
+type Phase = "analysis" | "decomposition" | "execution" | "review";
+
+type AttemptBudget = { used: number; max: number };
+
+type TaskRecord = {
+  id: string;
+  title: string;
+  phase: Phase | null;
+  condition: string | null;
+  terminal: string | null;
+  leasedTo: string | null;
+  leaseExpiresAt: number | null;
+  currentFenceToken: number;
+  attempts: Record<Phase, AttemptBudget>;
+};
+
+type TasksResponse = { tasks: TaskRecord[] };
+
+const apiBase = process.env["TASKCORE_BASE_URL"] ?? "http://127.0.0.1:18800";
+const dryRun = process.argv.includes("--dry-run");
+const retryDelayMs = 1_000;
+
+function nonEmptyText(value: string | null | undefined): value is string {
+  return typeof value === "string" && value.trim().length > 0;
+}
+
+function leakedLeaseReason(task: TaskRecord, now: number): "lease_expired" | "orphaned_on_restart" {
+  if (task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) {
+    return "lease_expired";
+  }
+  return "orphaned_on_restart";
+}
+
+function attemptNumber(task: TaskRecord, phase: Phase): number {
+  return Math.max(1, task.attempts[phase]?.used ?? 0);
+}
+
+async function main(): Promise<void> {
+  const now = Date.now();
+  const response = await fetch(`${apiBase}/tasks?full=true`);
+  if (!response.ok) {
+    throw new Error(`Failed to fetch tasks: ${response.status} ${response.statusText}`);
+  }
+
+  const body = await response.json() as TasksResponse;
+  const leaked = body.tasks.filter((task) =>
+    task.terminal === null &&
+    task.phase !== null &&
+    task.condition === "active" &&
+    (!nonEmptyText(task.leasedTo) || task.leaseExpiresAt === null || task.leaseExpiresAt <= now)
+  );
+
+  if (leaked.length === 0) {
+    process.stdout.write("No leaked active leases found.\n");
+    return;
+  }
+
+  for (const task of leaked) {
+    const reason = leakedLeaseReason(task, now);
+    const payload = {
+      type: "RetryScheduled",
+      taskId: task.id,
+      ts: Date.now(),
+      fenceToken: task.currentFenceToken,
+      reason,
+      retryAfter: Date.now() + retryDelayMs,
+      phase: task.phase,
+      attemptNumber: attemptNumber(task, task.phase),
+    };
+
+    if (dryRun) {
+      process.stdout.write(`[dry-run] would recover T${task.id} (${task.title}) with ${reason}\n`);
+      continue;
+    }
+
+    const recoverResponse = await fetch(`${apiBase}/tasks/${task.id}/events`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    if (!recoverResponse.ok) {
+      const errorBody = await recoverResponse.text();
+      throw new Error(`Failed to recover T${task.id}: ${recoverResponse.status} ${errorBody}`);
+    }
+
+    process.stdout.write(`Recovered T${task.id} (${task.title}) with ${reason}\n`);
+  }
+}
+
+void main().catch((error) => {
+  const message = error instanceof Error ? error.message : String(error);
+  process.stderr.write(`${message}\n`);
+  process.exitCode = 1;
+});