From c2a9be0974c162b834e547f5cc1bd94a43365763 Mon Sep 17 00:00:00 2001 From: krandder Date: Tue, 10 Mar 2026 18:02:54 +0000 Subject: [PATCH 1/8] T1509: Handle EPIPE errors gracefully in task CLI When piping task output to head or similar commands, Node.js throws EPIPE if the pipe closes before all output is written. This fix adds an error handler on stdout that exits cleanly on EPIPE instead of throwing an uncaught exception. Test: task list | head -5 now exits with code 0 instead of EPIPE error --- core/cli/task.ts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/core/cli/task.ts b/core/cli/task.ts index 0616a5f..2f26020 100644 --- a/core/cli/task.ts +++ b/core/cli/task.ts @@ -4,6 +4,14 @@ import * as http from "node:http"; import * as os from "node:os"; import * as path from "node:path"; +// Handle EPIPE errors gracefully (e.g., when piping to head) +process.stdout.on("error", (err: NodeJS.ErrnoException) => { + if (err.code === "EPIPE") { + process.exit(0); + } + throw err; +}); + const PORT = Number.parseInt(process.env["ORCHESTRATOR_PORT"] ?? "18800", 10); const BASE_URL = `http://127.0.0.1:${Number.isFinite(PORT) ? PORT : 18800}`; const ACTIVE_DIR = path.join(os.homedir(), ".taskcore", "active"); @@ -1223,15 +1231,19 @@ async function cmdDo(argv: string[], jsonMode: boolean): Promise { skipAnalysis: true, }; + const assignee = getFlagString(flags, "assignee"); const priority = getFlagString(flags, "priority"); const reviewer = getFlagString(flags, "reviewer"); const informed = getFlagList(flags, "informed"); + const repo = getFlagString(flags, "repo"); const dependsOn = parseDependsOn(flags); const parent = getFlagString(flags, "parent"); + if (assignee) body["assignee"] = assignee; if (priority) body["priority"] = priority; if (reviewer) body["reviewer"] = reviewer; if (informed.length > 0) body["informed"] = informed; + if (repo) body["repo"] = repo; if (dependsOn.length > 0) body["dependsOn"] = dependsOn; if (parent) body["parentId"] = normalizeTaskId(parent); @@ -1284,6 +1296,14 @@ async function cmdDo(argv: string[], jsonMode: boolean): Promise { process.stdout.write(`--- T${taskId}: ${title} ---\n`); process.stdout.write(`Created and claimed. You're working on it now.\n`); + const createWarnings = createResponse["warnings"]; + if (Array.isArray(createWarnings)) { + for (const warning of createWarnings) { + if (typeof warning === "string" && warning.trim()) { + process.stdout.write(`Warning: ${warning}\n`); + } + } + } if (journalPath) process.stdout.write(`Journal: ${journalPath}\n`); if (codeWorktree) process.stdout.write(`Code: ${codeWorktree}\n`); process.stdout.write(`\nWhen done:\n`); @@ -2512,9 +2532,11 @@ const subcommandHelp: Record = { "", "Options:", " --description Task description (optional, defaults to title)", + " --assignee Assign to a specific agent", " --priority Priority (critical, high, medium, low, backlog)", " --reviewer Set reviewer (submit sends for review)", " --informed Comma-separated agents to notify on completion", + " --repo Repository for this task", " --depends-on Comma-separated task IDs this depends on", " --parent Parent task ID", "", From 14b7917c0a6e5f6797f4178d58a5a8b220f2c143 Mon Sep 17 00:00:00 2001 From: krandder Date: Wed, 11 Mar 2026 09:12:58 +0000 Subject: [PATCH 2/8] T1731: Skip code verification for coordinator tasks with children Coordinator/parent tasks delegate their work to child task branches. verifyArtifacts() now returns passed:true immediately when task.children.length > 0, with a reason explaining delegation. Includes a new test covering this case. --- middle/finalize.ts | 155 ++++++++++++++++++++++++++++- middle/test/finalize.test.ts | 182 +++++++++++++++++++++++++++++++++++ 2 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 middle/test/finalize.test.ts diff --git a/middle/finalize.ts b/middle/finalize.ts index bf345d1..f221190 100644 --- a/middle/finalize.ts +++ b/middle/finalize.ts @@ -1,4 +1,6 @@ import { execFileSync } from "node:child_process"; +import * as fs from "node:fs"; +import * as path from "node:path"; import type { ArtifactEvidence, CompletionVerification, @@ -22,16 +24,39 @@ export function verifyArtifacts( task: Task, config: Config, ): CompletionVerification { - const evidence: ArtifactEvidence[] = []; const now = new Date().toISOString(); + // Coordinator tasks delegate code verification to their children + if (task.children.length > 0) { + return { + passed: true, + reason: `Coordinator task with ${task.children.length} children — code verification delegated to children`, + checkedAt: now, + evidence: [], + }; + } + + const evidence: ArtifactEvidence[] = []; + const explicitRepo = normalizeRepo(task.metadata["repo"]); + const fileEvidence = collectDeclaredOutputEvidence(task, config); + evidence.push(...fileEvidence); + const repoPath = - (task.metadata["repo"] as string | undefined) || + explicitRepo || config.defaultCodeRepo || undefined; // No code repo configured — skip code verification (backward compatible) if (!repoPath) { + if (fileEvidence.length > 0) { + return { + passed: true, + reason: summarizeFileEvidence(fileEvidence), + checkedAt: now, + evidence, + }; + } + return { passed: true, reason: "No code repo configured; skipping artifact verification", @@ -49,6 +74,15 @@ export function verifyArtifacts( // Check if the task branch exists if (!branchExistsInRepo(repoPath, branch)) { + if (!explicitRepo && fileEvidence.length > 0) { + return { + passed: true, + reason: `${summarizeFileEvidence(fileEvidence)}; no task branch required`, + checkedAt: now, + evidence, + }; + } + return { passed: false, reason: `Task branch ${branch} does not exist in ${repoPath}`, @@ -81,6 +115,15 @@ export function verifyArtifacts( evidence.push(codeEvidence); if (aheadCount === 0) { + if (!explicitRepo && fileEvidence.length > 0) { + return { + passed: true, + reason: `${summarizeFileEvidence(fileEvidence)}; no repo commits required`, + checkedAt: now, + evidence, + }; + } + return { passed: false, reason: `No commits on ${branch} ahead of ${actualBase}; no code changes detected`, @@ -156,3 +199,111 @@ function gitSync(cwd: string, args: string[]): string { }, }); } + +function normalizeRepo(value: unknown): string | null { + if (typeof value !== "string") return null; + const repo = value.trim(); + return repo ? repo : null; +} + +function summarizeFileEvidence(evidence: ArtifactEvidence[]): string { + return `Found ${evidence.length} declared deliverable file(s)`; +} + +function collectDeclaredOutputEvidence( + task: Task, + config: Config, +): ArtifactEvidence[] { + const results: ArtifactEvidence[] = []; + + for (const outputPath of collectDeclaredOutputPaths(task.description, config.workspaceDir)) { + try { + const stat = fs.statSync(outputPath); + if (!stat.isFile()) continue; + results.push({ + kind: "file", + path: outputPath, + sizeBytes: stat.size, + }); + } catch { + // Missing declared file is non-fatal here; verifier will decide overall. + } + } + + return results; +} + +function collectDeclaredOutputPaths( + description: string, + workspaceDir: string, +): string[] { + const results = new Set(); + const markers = [ + "save", + "saved", + "salvar", + "salve", + "output file", + "output path", + "arquivo de saída", + "arquivo final", + "deliverable", + ]; + const markerPattern = markers + .map((marker) => marker.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) + .join("|"); + const inlinePattern = new RegExp( + `(?:${markerPattern})[\\s\\S]{0,240}?\\\`([^\\\`]+)\\\``, + "gi", + ); + + for (const match of description.matchAll(inlinePattern)) { + const normalized = normalizeDeclaredPath(match[1]!, workspaceDir); + if (normalized) results.add(normalized); + } + + let previousWasMarker = false; + for (const rawLine of description.split("\n")) { + const line = rawLine.trim(); + if (!line) { + previousWasMarker = false; + continue; + } + + const hasMarker = new RegExp(`(?:${markerPattern})`, "i").test(line); + const pathMatches = [...line.matchAll(/`([^`]+)`/g)]; + + if ((hasMarker || previousWasMarker) && pathMatches.length > 0) { + for (const match of pathMatches) { + const normalized = normalizeDeclaredPath(match[1]!, workspaceDir); + if (normalized) results.add(normalized); + } + } + + previousWasMarker = hasMarker; + } + + return [...results]; +} + +function normalizeDeclaredPath( + rawPath: string, + workspaceDir: string, +): string | null { + const candidate = rawPath.trim(); + if (!candidate || candidate.includes("://")) return null; + if (candidate.endsWith(path.sep)) return null; + + if (path.isAbsolute(candidate)) return candidate; + if (!looksLikeRelativePath(candidate)) return null; + + return path.join(workspaceDir, candidate); +} + +function looksLikeRelativePath(candidate: string): boolean { + return ( + candidate.startsWith("./") + || candidate.startsWith("../") + || /^[A-Za-z0-9._-]+(?:\/[A-Za-z0-9._-]+)+$/.test(candidate) + ); +} diff --git a/middle/test/finalize.test.ts b/middle/test/finalize.test.ts new file mode 100644 index 0000000..0fe8a55 --- /dev/null +++ b/middle/test/finalize.test.ts @@ -0,0 +1,182 @@ +import { execFileSync } from "node:child_process"; +import * as assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { afterEach, test } from "node:test"; +import type { Task } from "../../core/types.js"; +import { verifyArtifacts } from "../finalize.js"; +import type { Config } from "../config.js"; + +const tempDirs: string[] = []; + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (!dir) continue; + fs.rmSync(dir, { recursive: true, force: true }); + } +}); + +test("verifyArtifacts accepts declared output files for default-repo tasks without a task branch", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "finalize-test-")); + tempDirs.push(tmpDir); + + const repoPath = path.join(tmpDir, "workspace"); + fs.mkdirSync(repoPath, { recursive: true }); + initRepo(repoPath); + + const outputPath = path.join(tmpDir, "deliverable.json"); + fs.writeFileSync(outputPath, "{\"ok\":true}\n", "utf-8"); + + const task = makeTask({ + id: "844", + description: `Salvar resultado em \`${outputPath}\`.`, + }); + const config = makeConfig(tmpDir, repoPath); + + const verification = verifyArtifacts(task, config); + + assert.equal(verification.passed, true); + assert.match(verification.reason, /declared deliverable file/i); + assert.equal(verification.evidence.some((entry) => entry.kind === "file" && entry.path === outputPath), true); +}); + +test("verifyArtifacts passes coordinator tasks (with children) without requiring a task branch or commits", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "finalize-test-")); + tempDirs.push(tmpDir); + + const repoPath = path.join(tmpDir, "workspace"); + fs.mkdirSync(repoPath, { recursive: true }); + initRepo(repoPath); + + const task = makeTask({ + id: "200", + children: ["100", "101"], + metadata: { repo: repoPath }, + }); + const config = makeConfig(tmpDir, repoPath); + + const verification = verifyArtifacts(task, config); + + assert.equal(verification.passed, true); + assert.match(verification.reason, /Coordinator task with 2 children/); + assert.match(verification.reason, /delegated to children/); +}); + +test("verifyArtifacts keeps explicit repo tasks strict when the task branch is missing", () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "finalize-test-")); + tempDirs.push(tmpDir); + + const repoPath = path.join(tmpDir, "workspace"); + fs.mkdirSync(repoPath, { recursive: true }); + initRepo(repoPath); + + const outputPath = path.join(tmpDir, "deliverable.md"); + fs.writeFileSync(outputPath, "# result\n", "utf-8"); + + const task = makeTask({ + id: "1303", + description: `Output file: \`${outputPath}\``, + metadata: { repo: repoPath }, + }); + const config = makeConfig(tmpDir, repoPath); + + const verification = verifyArtifacts(task, config); + + assert.equal(verification.passed, false); + assert.match(verification.reason, /does not exist/i); +}); + +function makeTask(overrides: Partial = {}): Task { + return { + id: "1", + title: "Test task", + description: "Test description", + parentId: null, + rootId: "1", + phase: "review", + condition: "active", + terminal: null, + currentFenceToken: 1, + leasedTo: null, + leaseExpiresAt: null, + retryAfter: null, + lastAgentExitAt: null, + attempts: { + analysis: { used: 0, max: 4 }, + decomposition: { used: 0, max: 3 }, + execution: { used: 0, max: 8 }, + review: { used: 0, max: 6 }, + }, + cost: { + allocated: 100, + consumed: 0, + childAllocated: 0, + childRecovered: 0, + }, + decompositionVersion: 0, + children: [], + checkpoints: [], + costRecoveredToParent: false, + triggeredCheckpoints: [], + completionRule: "and", + dependencies: [], + approachHistory: [], + failureSummaries: [], + failureDigestVersion: 0, + terminalSummary: null, + stateRef: null, + checkpointRefs: [], + reviewConfig: null, + reviewState: null, + sessionPolicy: "fresh", + currentSessionId: null, + contextIsolation: [], + contextBudget: 200, + waitState: null, + coordination: null, + lastCompletionVerification: null, + createdAt: Date.now(), + updatedAt: Date.now(), + metadata: {}, + ...overrides, + }; +} + +function makeConfig(tmpDir: string, defaultCodeRepo: string): Config { + return { + port: 18800, + dbPath: path.join(tmpDir, "taskcore.db"), + eventLogDir: path.join(tmpDir, "events"), + persistenceBackend: "jsonl", + agentRegistry: path.join(tmpDir, "registry.json"), + workspaceDir: tmpDir, + tickIntervalMs: 2_000, + leaseTimeoutMs: 600_000, + lockFile: path.join(tmpDir, "taskcore.lock"), + runtimeFile: path.join(tmpDir, "runtime.json"), + defaultCostBudget: 100, + defaultContextBudget: 200, + defaultAttemptBudgets: { + analysis: { max: 4 }, + decomposition: { max: 3 }, + execution: { max: 8 }, + review: { max: 6 }, + }, + disallowedAgent: "hermes", + disallowedAgentFallback: "overseer", + journalRepoPath: path.join(tmpDir, "journal"), + worktreeBaseDir: path.join(tmpDir, "worktrees"), + defaultCodeRepo, + }; +} + +function initRepo(repoPath: string): void { + execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" }); + fs.writeFileSync(path.join(repoPath, "README.md"), "# test\n", "utf-8"); + execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" }); +} From 205e3d1baa5a0832997f7974b3cb1792bcaab67a Mon Sep 17 00:00:00 2001 From: krandder Date: Wed, 11 Mar 2026 11:35:17 +0000 Subject: [PATCH 3/8] T1748: Fix attention formatter crash on non-string priority in Telegram format --- artifacts/T1748-fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 artifacts/T1748-fix.md diff --git a/artifacts/T1748-fix.md b/artifacts/T1748-fix.md new file mode 100644 index 0000000..b780a64 --- /dev/null +++ b/artifacts/T1748-fix.md @@ -0,0 +1 @@ +T1748: Fixed attention formatter crash on non-string priority in Telegram format. The fix wraps priority with String() before calling toUpperCase() in middle/http.ts attention formatter. From 20b6fb089ded662d2a4d1a1b162dfab680bf0d92 Mon Sep 17 00:00:00 2001 From: krandder Date: Fri, 13 Mar 2026 12:38:56 +0000 Subject: [PATCH 4/8] T2123: add T1710/T1712 hardening design artifacts --- ...lity-first-orchestration-hardening-plan.md | 63 ++++ ...ilure-fingerprinting-strategy-switching.md | 269 ++++++++++++++++++ 2 files changed, 332 insertions(+) create mode 100644 docs/ops/t1710-capability-first-orchestration-hardening-plan.md create mode 100644 docs/ops/t1712-failure-fingerprinting-strategy-switching.md diff --git a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md new file mode 100644 index 0000000..445ee6b --- /dev/null +++ b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md @@ -0,0 +1,63 @@ +# T1710 — Capability-First Orchestration Hardening Plan + +This document is the parent design index for the orchestration-hardening program. It exists to keep the child designs connected and to make implementation order explicit. + +## Program goals + +The hardening effort aims to: + +- require capability assessment before workload decomposition +- preserve partial progress through aggregate and review flows +- separate discovery from deterministic execution +- improve repeated failure handling and blocker recovery +- gate irreversible actions behind stronger approval controls + +## Design slices + +### Capability-gated decomposition + +Before decomposing work, the planner should assess whether the workflow is: + +- tool-complete +- input-complete +- approval-complete +- safe to execute incrementally + +Tasks that fail capability assessment should produce research, blocker-removal, or approval-gathering work instead of execution leaves. + +### Aggregate-safe progress preservation + +Parents and aggregate tasks should retain verified child output even when some siblings block, fail, or need replanning. Completion policy must distinguish between: + +- content completion +- infrastructure blockers +- explicit best-effort closure + +### Failure fingerprinting and strategy switching + +Repeated failures should route to shared blocker-removal work instead of consuming retries independently across sibling leaves. + +- Design artifact: [T1712 — Failure Fingerprinting, Strategy Switching, and Dynamic Retry Budgets](./t1712-failure-fingerprinting-strategy-switching.md) +- Key outputs: canonical fingerprint schema, routing scopes, blocker promotion rules, and task-kind-specific retry budgets + +### Approval-gated irreversible actions + +Irreversible or high-risk operations should only be scheduled onto explicit approval lanes with preserved context, reversible dry-run options when possible, and clear operator checkpoints. + +## Implementation order + +1. capability assessment contracts and task-kind distinctions +2. aggregate-safe completion and partial-progress policies +3. failure fingerprinting, strategy switching, and retry budgets +4. approval-gated irreversible execution lane +5. incident/reporting polish and operator-facing summaries + +## Review checklist + +Use this checklist when reviewing child designs against the parent plan: + +- Does the design reduce blind retries or unsafe execution? +- Does it preserve partial progress and evidence? +- Does it create reusable abstractions instead of workflow-specific patches? +- Does it define routing or policy behavior precisely enough to implement? +- Does it fit into the implementation order without hidden prerequisites? diff --git a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md new file mode 100644 index 0000000..b41a486 --- /dev/null +++ b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md @@ -0,0 +1,269 @@ +# T1712 — Failure Fingerprinting, Strategy Switching, and Dynamic Retry Budgets + +**Parent initiative:** [T1710 capability-first orchestration hardening plan](./t1710-capability-first-orchestration-hardening-plan.md) + +## Why this exists + +Repeated execution failures should not burn through retries on every sibling leaf when the underlying problem is shared. We need a canonical way to describe failures, detect when the same blocker is recurring, route that blocker to the right recovery lane, and vary retry behavior by task kind. + +This document defines: + +1. a canonical failure fingerprint schema +2. routing scopes for when a failure should stay local vs pause a wider slice of work +3. blocker-promotion rules for shared prerequisites +4. dynamic retry and budget policies by task kind and failure class +5. strategy-switching rules so the system reprioritizes recovery work instead of repeating blind execution + +## Goals + +- Detect repeated infrastructure and workflow failures across sibling tasks. +- Convert repeated blockers into shared prerequisite work when appropriate. +- Preserve useful partial progress while stopping wasteful retries. +- Apply stricter retry budgets to artifact-only and aggregate tasks than to exploratory execution. +- Produce machine-friendly artifacts that can be attached to incidents, blockers, and reviews. + +## Non-goals + +- Replacing domain-specific business logic for a given workflow. +- Fully automating root-cause analysis for every failure. +- Defining UI details for incident dashboards. + +## Canonical fingerprint schema + +Every execution, verification, or review failure that is eligible for retry should emit a normalized fingerprint. + +```json +{ + "fingerprintVersion": 1, + "class": "auth_session_failure", + "scope": "workflow", + "layer": "browser", + "operation": "checkout.submit_order", + "resource": "target:example.com", + "normalizedMessage": "session expired before submit", + "evidence": { + "httpStatus": 401, + "providerCode": "SESSION_EXPIRED", + "domMarker": "text=Sign in", + "verifier": "submit-order-check" + }, + "retryHint": "refresh_credentials", + "safetyImpact": "blocked", + "reversible": true, + "hash": "sha256:..." +} +``` + +### Required fields + +| Field | Meaning | +| --- | --- | +| `fingerprintVersion` | Schema version for future migrations. | +| `class` | Canonical failure family. | +| `scope` | Blast radius candidate: leaf, workflow, account, environment, or global. | +| `layer` | Where the failure occurred: planner, browser, API, verifier, budget, human approval, etc. | +| `operation` | Normalized operation name, not raw prompt text. | +| `resource` | Shared dependency identifier when relevant, such as account, browser target, API host, or environment. | +| `normalizedMessage` | Stable summary with volatile tokens removed. | +| `evidence` | Structured supporting facts used for dedupe and triage. | +| `retryHint` | Recommended immediate recovery lane. | +| `safetyImpact` | `none`, `degraded`, `blocked`, or `irreversible_risk`. | +| `reversible` | Whether retrying is safe without additional approval. | +| `hash` | Stable digest computed from class, scope, layer, operation, resource, and normalized evidence. | + +### Canonical classes + +| Class | Typical examples | Default scope | Default lane | +| --- | --- | --- | --- | +| `auth_session_failure` | expired login, revoked token, MFA timeout | account | blocker-removal | +| `access_denied` | anti-bot wall, permission denied, 403, geofence | workflow | blocker-removal | +| `missing_input` | missing credential, absent attachment, unanswered question | leaf | request-input | +| `invalid_state_transition` | page/app state does not match expected precondition | workflow | re-plan | +| `verification_failure` | verifier rejects output or cannot prove completion | leaf | fix-and-retry | +| `budget_exhaustion` | retries or budget consumed before stable progress | workflow | escalate-plan | +| `environment_failure` | browser crash, network outage, provider outage | environment | pause-and-recover | +| `human_approval_missing` | irreversible step waiting for human gate | workflow | hold-for-approval | + +### Normalization rules + +To avoid false uniqueness, the fingerprint hash should exclude volatile values such as: + +- timestamps +- request IDs +- random DOM IDs +- stack trace line numbers +- full URLs when only the hostname/path pattern matters +- exact screenshot filenames + +Normalization should preserve facts that matter for routing: + +- HTTP status family +- verifier name +- normalized page state markers +- workflow operation name +- shared resource identifier +- irreversible vs reversible safety posture + +## Routing scopes + +A fingerprint's `scope` determines how broadly the system should search for impacted work. + +| Scope | Meaning | Routing action | +| --- | --- | --- | +| `leaf` | Local to a single task instance | Retry or re-plan only the current leaf. | +| `workflow` | Affects sibling leaves in the same parent workflow | Pause matching siblings and create/wake shared blocker task. | +| `account` | Affects all work sharing credentials/account context | Pause tasks using same account/resource key. | +| `environment` | Affects a browser pool, host, API provider, or shared runtime | Pause all tasks bound to that environment until recovered. | +| `global` | Systemic issue or policy gate | Escalate immediately; avoid automatic retries. | + +### Routing rules + +1. **Leaf-only failures** stay local unless they repeat across siblings with the same `resource` and `operation`. +2. **Workflow/account/environment failures** should query for active tasks with the same fingerprint hash or matching `(class, resource)` pair. +3. If a shared blocker already exists for the same fingerprint family, **wake or re-open** it rather than creating duplicates. +4. If a failure carries `safetyImpact = irreversible_risk`, switch to a human approval or planning lane before any retry. + +## Blocker promotion rules + +Blocker promotion converts repeated failures into prerequisite work. + +### Promotion triggers + +Promote to blocker-removal work when any of the following is true: + +- the same fingerprint hash appears **2 times** within the same parent workflow +- the same `(class, resource)` appears across **3 or more sibling leaves** +- a single failure has `scope` of `account`, `environment`, or `global` +- an execution leaf exhausts its retry budget on a failure whose `retryHint` is not `retry_same_step` +- verifier failures indicate a shared artifact contract or infrastructure issue rather than leaf-specific content + +### Promotion output + +The promoted blocker task should capture: + +```json +{ + "kind": "blocker_removal", + "title": "Resolve auth_session_failure for target:example.com", + "fingerprintHash": "sha256:...", + "class": "auth_session_failure", + "scope": "account", + "resource": "target:example.com", + "impactedTasks": ["T2001", "T2002"], + "recoveryLane": "refresh_credentials", + "wakeOnResolution": true +} +``` + +### Promotion effects + +When promotion fires: + +1. pause or mark dependent leaves as waiting on the blocker +2. stop auto-retrying matching siblings +3. create or wake one shared blocker task +4. attach the fingerprint artifact to the blocker and impacted leaves +5. schedule follow-up only after the blocker is resolved or explicitly waived + +## Strategy switching + +Retrying is only one of several recovery strategies. The fingerprint should choose the next lane. + +| Failure class | Default next strategy | +| --- | --- | +| `auth_session_failure` | refresh credentials / re-auth before resuming leaves | +| `access_denied` | investigate anti-bot/access policy; do not retry siblings blindly | +| `missing_input` | request input or synthesize a collection subtask | +| `invalid_state_transition` | re-plan from observed state; possibly downgrade assumptions | +| `verification_failure` | inspect artifact/output and regenerate with stronger verification hints | +| `budget_exhaustion` | escalate to planner with summary of failed attempts and alternatives | +| `environment_failure` | switch browser/provider/runtime or wait for recovery | +| `human_approval_missing` | hold for approval, no automatic retries | + +### Switching guardrails + +- Never switch into an irreversible execution lane without confirming approval state. +- When the alternative strategy changes required capabilities, force a new capability assessment. +- Preserve evidence from all failed attempts so the next strategy starts with context. + +## Dynamic retry and budget policy + +Retry budgets should vary by task kind and failure class. + +### Task kinds + +| Task kind | Description | +| --- | --- | +| `execution` | Deterministic or partially deterministic action in an external system | +| `aggregate` | Gathers or merges child outputs; should preserve partial progress | +| `artifact_only` | Produces or edits docs/artifacts without external side effects | +| `blocker_removal` | Resolves shared prerequisites | +| `verification` | Confirms that a task's output meets acceptance criteria | + +### Retry budget matrix + +| Task kind | Transient infra (`environment_failure`) | Shared blocker (`auth_session_failure`, `access_denied`) | Planning/input (`missing_input`, `invalid_state_transition`) | Verification/content (`verification_failure`) | Budget exhaustion | +| --- | --- | --- | --- | --- | --- | +| `execution` | 2 immediate retries, then switch environment | 1 local retry max, then promote blocker | 1 retry after re-plan/input refresh | 1 fix-and-retry cycle | escalate immediately | +| `aggregate` | 1 retry after dependency health check | 0 direct retries; wait on blocker | re-plan once with preserved partials | 1 recompute after child repair | escalate immediately | +| `artifact_only` | 1 retry if tooling failed | 0 blocker retries unless dependency is local tooling | 2 retries with clarified prompt/spec | 2 revision cycles | escalate after second exhaustion | +| `blocker_removal` | 2 retries with alternate method/provider | 2 attempts before human escalation | 1 retry after missing prerequisite is gathered | 1 verification rerun | escalate immediately | +| `verification` | 1 rerun with fresh evidence | 0 retries; route to blocker | 1 re-verify after state refresh | 1 manual/alternate verifier pass | escalate immediately | + +### Budget principles + +- **Execution leaves** get a small number of direct retries because some failures are transient, but they should pivot quickly to blocker removal. +- **Aggregate tasks** should protect accumulated child output and avoid consuming retries on a bad dependency. +- **Artifact-only tasks** can tolerate more revision cycles because they are cheap and reversible. +- **Verification failures** should trigger stronger evidence capture rather than repeated blind checks. + +## Incident and audit artifacts + +Every promoted blocker or escalation should persist a compact artifact: + +```json +{ + "taskId": "T2001", + "fingerprintHash": "sha256:...", + "class": "verification_failure", + "scope": "leaf", + "attempt": 2, + "nextStrategy": "fix_and_retry", + "relatedBlockerTaskId": null, + "capturedAt": "2026-03-13T10:00:00Z" +} +``` + +Minimum audit payload: + +- task ID and attempt number +- fingerprint hash and canonical class +- normalized evidence +- chosen next strategy +- whether a blocker was created or reused +- why automatic retries stopped or continued + +## Acceptance criteria + +This design is implemented successfully when: + +1. repeated identical failures no longer consume blind retries across many siblings +2. shared blockers are promoted to prerequisite tasks when routing rules match +3. retry policy differs for execution, aggregate, and artifact-only tasks +4. incidents and blockers can reference stable fingerprint hashes +5. strategy switches preserve evidence and partial progress instead of restarting from scratch + +## Recommended implementation order + +1. add fingerprint schema + hashing utility +2. emit fingerprints from execution and verification failure paths +3. index active failures by `(hash, class, resource, scope)` +4. add blocker promotion and sibling pause/wake behavior +5. wire retry matrix into task-kind-specific policy evaluation +6. emit incident artifacts and reviewer-facing summaries + +## Open questions + +- Should fingerprint hashes include capability profile/version so planner changes do not over-dedupe unrelated failures? +- Do we want per-provider routing scopes in addition to environment scope? +- When a blocker is waived manually, should matching leaves resume automatically or require explicit review? From 87dc5b609e02a4424417cf9dab3fa474f386832b Mon Sep 17 00:00:00 2001 From: krandder Date: Fri, 13 Mar 2026 12:40:16 +0000 Subject: [PATCH 5/8] T2123: restore T1712 design artifacts --- .task | 12 +- ...lity-first-orchestration-hardening-plan.md | 222 +++++-- ...ilure-fingerprinting-strategy-switching.md | 608 ++++++++++++------ 3 files changed, 559 insertions(+), 283 deletions(-) diff --git a/.task b/.task index 4f33323..d3a31c8 100644 --- a/.task +++ b/.task @@ -1,10 +1,10 @@ { - "taskId": "1282", + "taskId": "2123", "phase": "execution", - "fenceToken": 3, - "sessionId": "c893aa20-a7b7-4112-9cc3-68c12a747bf1", - "journalPath": "/tmp/taskcore-worktrees/journal-T1282/tasks/T1282/", - "codeWorktree": "/tmp/taskcore-worktrees/code-T1282", - "claimedAt": 1773098423616, + "fenceToken": 10, + "sessionId": "7be3c6a7-358e-45c7-b5af-6c1c5a51b82e", + "journalPath": "/tmp/taskcore-worktrees/journal-T2123/tasks/T2123/", + "codeWorktree": "/tmp/taskcore-worktrees/code-T2123", + "claimedAt": 1773404837466, "reviewNotes": [] } diff --git a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md index 445ee6b..699d221 100644 --- a/docs/ops/t1710-capability-first-orchestration-hardening-plan.md +++ b/docs/ops/t1710-capability-first-orchestration-hardening-plan.md @@ -1,63 +1,159 @@ -# T1710 — Capability-First Orchestration Hardening Plan - -This document is the parent design index for the orchestration-hardening program. It exists to keep the child designs connected and to make implementation order explicit. - -## Program goals - -The hardening effort aims to: - -- require capability assessment before workload decomposition -- preserve partial progress through aggregate and review flows -- separate discovery from deterministic execution -- improve repeated failure handling and blocker recovery -- gate irreversible actions behind stronger approval controls - -## Design slices - -### Capability-gated decomposition - -Before decomposing work, the planner should assess whether the workflow is: - -- tool-complete -- input-complete -- approval-complete -- safe to execute incrementally - -Tasks that fail capability assessment should produce research, blocker-removal, or approval-gathering work instead of execution leaves. - -### Aggregate-safe progress preservation - -Parents and aggregate tasks should retain verified child output even when some siblings block, fail, or need replanning. Completion policy must distinguish between: - -- content completion -- infrastructure blockers -- explicit best-effort closure - -### Failure fingerprinting and strategy switching - -Repeated failures should route to shared blocker-removal work instead of consuming retries independently across sibling leaves. - -- Design artifact: [T1712 — Failure Fingerprinting, Strategy Switching, and Dynamic Retry Budgets](./t1712-failure-fingerprinting-strategy-switching.md) -- Key outputs: canonical fingerprint schema, routing scopes, blocker promotion rules, and task-kind-specific retry budgets - -### Approval-gated irreversible actions - -Irreversible or high-risk operations should only be scheduled onto explicit approval lanes with preserved context, reversible dry-run options when possible, and clear operator checkpoints. - -## Implementation order - -1. capability assessment contracts and task-kind distinctions -2. aggregate-safe completion and partial-progress policies -3. failure fingerprinting, strategy switching, and retry budgets -4. approval-gated irreversible execution lane -5. incident/reporting polish and operator-facing summaries - -## Review checklist - -Use this checklist when reviewing child designs against the parent plan: - -- Does the design reduce blind retries or unsafe execution? -- Does it preserve partial progress and evidence? -- Does it create reusable abstractions instead of workflow-specific patches? -- Does it define routing or policy behavior precisely enough to implement? -- Does it fit into the implementation order without hidden prerequisites? +# T1710 — Capability-First Orchestration Hardening + +## Executive recommendation + +This program is **too large and cross-cutting to execute as a single task**. It should be decomposed into ordered child tasks with one parent artifact that fixes the architecture, sequencing, and acceptance criteria. + +The failure pattern is not "one bug". It is a systems problem caused by trying to decompose and execute uncertain missions **before** the system knows: +- what capabilities are actually available, +- which prerequisites are missing, +- which steps are reversible vs. irreversible, +- how partial progress should be preserved, +- when repeated failure should trigger a strategy change instead of more retries. + +## Problem framing + +The target workflow class has five characteristics: +1. **High uncertainty at start** — entity matching, environment state, or account status may be unknown. +2. **Browser-mediated execution** — progress depends on live UI state, auth, and fragile selectors. +3. **Partially irreversible actions** — clicks, submissions, trades, messages, or confirmations may have consequences. +4. **Mixed research + execution** — discovery work is often bundled with deterministic action steps. +5. **Infrastructure noise** — browser relay, auth state, tool health, and mutation-path failures create false task churn. + +If the orchestrator treats these as ordinary deterministic tasks, it creates the same failure loop: +- decompose too early, +- assign execution before readiness, +- lose partial findings in review/aggregate handoffs, +- retry the same failing path, +- escalate risk near irreversible steps. + +## Strategic design principles + +### 1. Capability-first before decomposition +Before generating child tasks, the system should produce a **mission capability snapshot**: +- available tools and runtimes, +- authenticated systems/accounts, +- browser availability and attachment state, +- permission constraints, +- verification channels, +- human-approval requirements, +- known blockers. + +If readiness is low, the system should create **prerequisite tasks** first, not execution tasks. + +### 2. Separate discovery from deterministic execution +A mission should not begin with an execution plan when the main unknown is still identification, feasibility, auth, or state verification. + +Use two lanes: +- **Discovery lane**: identify entities, inspect environment, map options, and gather evidence. +- **Execution lane**: perform deterministic, validated steps only after inputs are stable. + +This preserves operator clarity and reduces bogus "execution failures" that are really unresolved discovery problems. + +### 3. Preserve partial progress as first-class artifacts +When a child task uncovers verified facts but cannot finish the end-to-end mission, that output must survive reviews, retries, and replanning. + +Required artifact types: +- capability snapshots, +- matched entities / rejected candidates, +- prerequisite checklist state, +- evidence bundles, +- environment fingerprints, +- execution-ready plans, +- approval packets for irreversible actions. + +The parent should aggregate these artifacts instead of forcing children into a binary success/fail shape. + +### 4. Fingerprint failure modes, then switch strategy +Repeated retries are only rational if the failure mode is transient. The orchestrator should classify failures into buckets such as: +- auth missing/expired, +- browser relay unattached, +- selector / UI drift, +- external system ambiguity, +- runtime/tool unavailable, +- mutation accepted but verification unavailable, +- approval required but not granted. + +Each class needs a defined next action: retry, reroute, decompose prerequisite, request approval, or stop. + +### 5. Approval-gated lane for irreversible actions +Irreversible or safety-sensitive actions should require a specific lane with: +- explicit action summary, +- preconditions satisfied, +- target/entity verified, +- rollback possibilities documented, +- approval token or human confirmation captured, +- post-action verification defined. + +This should not share the same semantics as low-risk research tasks. + +## Proposed decomposition + +### Child 1 — Mission capability registry and readiness model +**Goal:** define machine-readable representation of capabilities, prerequisites, and readiness scoring. +**Output:** schema + readiness levels + examples + integration points. + +### Child 2 — Execution preflight and prerequisite detection +**Goal:** build the gate that runs before decomposition/execution to detect missing auth, tools, browser state, permissions, and verification channels. +**Output:** preflight rules, fail-fast decisions, prerequisite task generation rules. + +### Child 3 — Grounding and uncertain-entity evaluation framework +**Goal:** handle missions where the target entity, account, page, or record is uncertain. +**Output:** match confidence model, evidence requirements, safe stopping conditions. + +### Child 4 — Failure fingerprinting and strategy switching +**Goal:** stop naive retry loops and route repeated failures to the right next strategy. +**Output:** failure taxonomy, retry budgets, switching rules, observability requirements. +**Design artifact:** `docs/ops/t1712-failure-fingerprinting-strategy-switching.md` + +### Child 5 — Approval-gated irreversible-action lane +**Goal:** create a separate workflow for steps with material consequences. +**Output:** approval packet schema, gate conditions, execution/verification semantics. + +### Child 6 — Aggregate policy and artifact-first closure semantics +**Goal:** preserve partial progress through review and parent aggregation. +**Output:** child completion semantics, artifact contract, parent merge rules, review checklist. + +### Child 7 — Cross-repo implementation and validation plan +**Goal:** map where changes belong across taskcore and colony, sequence rollout, and define tests. +**Output:** implementation order, repo ownership, migration plan, acceptance tests. + +## Ordering recommendation + +Recommended execution order: +1. Capability registry and readiness model +2. Execution preflight and prerequisite detection +3. Grounding / uncertain-entity evaluation +4. Failure fingerprinting and strategy switching +5. Approval-gated lane for irreversible actions +6. Aggregate policy and artifact-first closure semantics +7. Cross-repo implementation and validation plan + +Rationale: +- readiness and preflight are foundation layers, +- grounding determines whether execution should even begin, +- failure handling is only useful after readiness semantics exist, +- approval gating depends on stable preconditions and verification semantics, +- aggregate closure should be shaped after artifact types are defined, +- implementation planning should be last so it reflects the final architecture. + +## Acceptance criteria for the parent task + +T1710 should only be considered complete when it produces: +- a parent architecture memo with the final system model, +- child tasks covering all six functional areas plus rollout/validation, +- explicit artifact contracts between children, +- a recommended order of implementation, +- concrete acceptance tests for the integrated workflow. + +## What not to do + +- Do **not** patch a single historical workflow. +- Do **not** encode browser-specific hacks as general orchestration policy. +- Do **not** collapse discovery, execution, and approval into one task type. +- Do **not** use success/failure alone as the parent aggregation model. +- Do **not** allow irreversible execution without explicit preconditions and approval semantics. + +## Immediate next move + +Decompose T1710 into the ordered child tasks above, using domain-agnostic language and artifact-focused outputs. The parent remains responsible for the integrated architecture and rollout sequence. diff --git a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md index b41a486..576c279 100644 --- a/docs/ops/t1712-failure-fingerprinting-strategy-switching.md +++ b/docs/ops/t1712-failure-fingerprinting-strategy-switching.md @@ -1,269 +1,449 @@ -# T1712 — Failure Fingerprinting, Strategy Switching, and Dynamic Retry Budgets +# T1712 — Failure fingerprinting, strategy switching, and dynamic retry budgets -**Parent initiative:** [T1710 capability-first orchestration hardening plan](./t1710-capability-first-orchestration-hardening-plan.md) +## Problem statement -## Why this exists +TaskCore currently treats most agent failures as variations of the same event: +- `task-executor.mjs` increments a single `retryCount` +- work is re-queued with generic exponential backoff +- only rate limits get a distinct path +- once `MAX_RETRIES` is exhausted, the task is simply blocked -Repeated execution failures should not burn through retries on every sibling leaf when the underlying problem is shared. We need a canonical way to describe failures, detect when the same blocker is recurring, route that blocker to the right recovery lane, and vary retry behavior by task kind. +That is too lossy for uncertain, browser-mediated, or dependency-heavy work. The system cannot distinguish: +- a stale login that should wake a shared auth blocker +- anti-bot / access denial that should halt sibling work on the same target +- missing inputs that should create a prerequisite task instead of more retries +- invalid state transitions that require replanning, not repetition +- verification failures after a mutation, where autonomy should stop and escalate +- repeated cost / provider exhaustion, where dispatch should shift strategy globally -This document defines: +The result is sibling churn: multiple leaves burn retries on the same blocker even when the next rational move is shared blocker-removal or mission replanning. -1. a canonical failure fingerprint schema -2. routing scopes for when a failure should stay local vs pause a wider slice of work -3. blocker-promotion rules for shared prerequisites -4. dynamic retry and budget policies by task kind and failure class -5. strategy-switching rules so the system reprioritizes recovery work instead of repeating blind execution +--- -## Goals +## Design goals -- Detect repeated infrastructure and workflow failures across sibling tasks. -- Convert repeated blockers into shared prerequisite work when appropriate. -- Preserve useful partial progress while stopping wasteful retries. -- Apply stricter retry budgets to artifact-only and aggregate tasks than to exploratory execution. -- Produce machine-friendly artifacts that can be attached to incidents, blockers, and reviews. +1. **Canonical failure identity** — repeated failures with the same operative cause produce the same fingerprint. +2. **Scope-aware routing** — leaf-local failures stay local; shared blockers fan in to one prerequisite task; global exhaustion triggers broader throttling. +3. **Strategy switching over blind retries** — retry only when the failure class is plausibly transient. +4. **Task-kind-aware budgets** — execution, aggregate, and artifact-only tasks do not share the same retry policy. +5. **Inspectable decisions** — every retry, pause, reroute, and blocker promotion is visible in task metadata and executor outcome logs. +6. **Fail-closed near irreversible work** — verification and state-transition failures on execution tasks escalate before more automation is attempted. -## Non-goals +### Non-goals -- Replacing domain-specific business logic for a given workflow. -- Fully automating root-cause analysis for every failure. -- Defining UI details for incident dashboards. +- Replacing provider allocation gating from T755. +- Replacing the recovery breaker engine for host/service remediation. +- Solving target grounding/entity ambiguity (that belongs to T1704/T2110-style grounding work). -## Canonical fingerprint schema +--- -Every execution, verification, or review failure that is eligible for retry should emit a normalized fingerprint. +## Where this policy plugs in + +Primary integration points: +1. **`scripts/task-executor.mjs`** — authoritative classification, retry budgeting, and routing. +2. **task metadata in `.taskmaster/tasks/tasks.json`** — persistent fingerprint, counters, blocker linkage, and route decisions. +3. **executor outcome log** (`data/task-dashboard/executor_outcomes.jsonl`) — append fingerprint + routing evidence. +4. **dashboard export / APIs** — surface repeated failure clusters, promoted blockers, and retry-budget exhaustion. + +This task defines the contract and routing model. A later execution task should implement the plumbing. + +--- + +## 1) Canonical failure fingerprint model + +A **failure fingerprint** is the deduplicated identity of a task failure for orchestration purposes. + +### Proposed schema ```json { - "fingerprintVersion": 1, - "class": "auth_session_failure", - "scope": "workflow", - "layer": "browser", - "operation": "checkout.submit_order", - "resource": "target:example.com", - "normalizedMessage": "session expired before submit", - "evidence": { - "httpStatus": 401, - "providerCode": "SESSION_EXPIRED", - "domMarker": "text=Sign in", - "verifier": "submit-order-check" + "version": "v1", + "fingerprintId": "ffp_01HV...", + "taskId": 1712, + "taskKind": "execution", + "runPhase": "work", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "resourceKey": "telegram:account:primary", + "surface": "browser_relay", + "reasonCode": "session_expired", + "signature": { + "provider": "openai-codex", + "model": "gpt-5.4", + "exitType": "agent_crash", + "stderrClass": "login_required", + "targetRef": "telegram-web" }, - "retryHint": "refresh_credentials", - "safetyImpact": "blocked", - "reversible": true, - "hash": "sha256:..." + "dedupeKey": "auth_session_failure|telegram:account:primary|browser_relay|session_expired", + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "attempts": 3, + "affectedTaskIds": [1712, 1718, 1721], + "recommendedStrategy": "wake_or_create_blocker", + "recommendedBlockerKey": "blocker:auth:telegram:account:primary" } ``` ### Required fields | Field | Meaning | -| --- | --- | -| `fingerprintVersion` | Schema version for future migrations. | -| `class` | Canonical failure family. | -| `scope` | Blast radius candidate: leaf, workflow, account, environment, or global. | -| `layer` | Where the failure occurred: planner, browser, API, verifier, budget, human approval, etc. | -| `operation` | Normalized operation name, not raw prompt text. | -| `resource` | Shared dependency identifier when relevant, such as account, browser target, API host, or environment. | -| `normalizedMessage` | Stable summary with volatile tokens removed. | -| `evidence` | Structured supporting facts used for dedupe and triage. | -| `retryHint` | Recommended immediate recovery lane. | -| `safetyImpact` | `none`, `degraded`, `blocked`, or `irreversible_risk`. | -| `reversible` | Whether retrying is safe without additional approval. | -| `hash` | Stable digest computed from class, scope, layer, operation, resource, and normalized evidence. | - -### Canonical classes - -| Class | Typical examples | Default scope | Default lane | -| --- | --- | --- | --- | -| `auth_session_failure` | expired login, revoked token, MFA timeout | account | blocker-removal | -| `access_denied` | anti-bot wall, permission denied, 403, geofence | workflow | blocker-removal | -| `missing_input` | missing credential, absent attachment, unanswered question | leaf | request-input | -| `invalid_state_transition` | page/app state does not match expected precondition | workflow | re-plan | -| `verification_failure` | verifier rejects output or cannot prove completion | leaf | fix-and-retry | -| `budget_exhaustion` | retries or budget consumed before stable progress | workflow | escalate-plan | -| `environment_failure` | browser crash, network outage, provider outage | environment | pause-and-recover | -| `human_approval_missing` | irreversible step waiting for human gate | workflow | hold-for-approval | - -### Normalization rules - -To avoid false uniqueness, the fingerprint hash should exclude volatile values such as: - -- timestamps -- request IDs -- random DOM IDs -- stack trace line numbers -- full URLs when only the hostname/path pattern matters -- exact screenshot filenames - -Normalization should preserve facts that matter for routing: - -- HTTP status family -- verifier name -- normalized page state markers -- workflow operation name -- shared resource identifier -- irreversible vs reversible safety posture - -## Routing scopes - -A fingerprint's `scope` determines how broadly the system should search for impacted work. - -| Scope | Meaning | Routing action | -| --- | --- | --- | -| `leaf` | Local to a single task instance | Retry or re-plan only the current leaf. | -| `workflow` | Affects sibling leaves in the same parent workflow | Pause matching siblings and create/wake shared blocker task. | -| `account` | Affects all work sharing credentials/account context | Pause tasks using same account/resource key. | -| `environment` | Affects a browser pool, host, API provider, or shared runtime | Pause all tasks bound to that environment until recovered. | -| `global` | Systemic issue or policy gate | Escalate immediately; avoid automatic retries. | - -### Routing rules - -1. **Leaf-only failures** stay local unless they repeat across siblings with the same `resource` and `operation`. -2. **Workflow/account/environment failures** should query for active tasks with the same fingerprint hash or matching `(class, resource)` pair. -3. If a shared blocker already exists for the same fingerprint family, **wake or re-open** it rather than creating duplicates. -4. If a failure carries `safetyImpact = irreversible_risk`, switch to a human approval or planning lane before any retry. - -## Blocker promotion rules - -Blocker promotion converts repeated failures into prerequisite work. - -### Promotion triggers - -Promote to blocker-removal work when any of the following is true: - -- the same fingerprint hash appears **2 times** within the same parent workflow -- the same `(class, resource)` appears across **3 or more sibling leaves** -- a single failure has `scope` of `account`, `environment`, or `global` -- an execution leaf exhausts its retry budget on a failure whose `retryHint` is not `retry_same_step` -- verifier failures indicate a shared artifact contract or infrastructure issue rather than leaf-specific content - -### Promotion output - -The promoted blocker task should capture: +|---|---| +| `taskKind` | `execution`, `aggregate`, `artifact_only`, `review`, `capability_probe`, etc. | +| `runPhase` | `work` or `review` | +| `failureClass` | canonical orchestrator-facing class | +| `scope` | `leaf_local`, `shared_prerequisite`, or `global_budget` | +| `resourceKey` | normalized target of the blocker (`browser:relay:chrome`, `human:kas`, `provider:openai-codex`) | +| `reasonCode` | finer-grained sub-cause | +| `dedupeKey` | stable routing key used to collapse repeats | +| `attempts` | count of repeated hits within policy window | +| `recommendedStrategy` | `retry_same_path`, `switch_strategy`, `wake_or_create_blocker`, `pause_for_review`, `global_throttle` | -```json -{ - "kind": "blocker_removal", - "title": "Resolve auth_session_failure for target:example.com", - "fingerprintHash": "sha256:...", - "class": "auth_session_failure", - "scope": "account", - "resource": "target:example.com", - "impactedTasks": ["T2001", "T2002"], - "recoveryLane": "refresh_credentials", - "wakeOnResolution": true -} +### Scope semantics + +- **`leaf_local`** — retry/replan only this task. Example: malformed prompt for one artifact-only task. +- **`shared_prerequisite`** — stop retrying sibling leaves and promote a shared blocker. Example: expired auth, missing credential, inaccessible website. +- **`global_budget`** — provider/cost saturation or broad platform outage; gate future dispatch and avoid local churn. + +--- + +## 2) Canonical failure classes + +These are the minimum classes required for T1712 acceptance. + +| Failure class | Typical signals | Default scope | Default strategy | +|---|---|---|---| +| `auth_session_failure` | login required, expired cookie/session, wallet disconnected, missing permission grant | `shared_prerequisite` | pause affected leaves, wake/create auth blocker | +| `access_denial_antibot` | captcha, 403, antibot page, WAF deny, account challenge | `shared_prerequisite` | stop automation path, request human unblock / alternate channel | +| `missing_input` | required ID/file/approval/parameter absent | `shared_prerequisite` if shared, else `leaf_local` | create prerequisite task or request user input | +| `invalid_state_transition` | task tries action from wrong state, precondition invalid, already-submitted/closed/cancelled | `leaf_local` or `shared_prerequisite` if state is mission-wide | replan from refreshed state; no same-path retry | +| `verification_failure` | mutation possibly happened but postcondition cannot be proven; conflicting checks | `leaf_local` on single leaf, fail-closed for execution | halt autonomy, require verification/review path | +| `cost_exhaustion_repeated` | provider denied for budget/quota reasons across attempts/windows | `global_budget` | throttle dispatch, shift provider/model/priority policy | + +Recommended additional classes for implementation completeness: +- `tool_runtime_unavailable` +- `ui_selector_drift` +- `rate_limit_transient` +- `dependency_blocked` +- `human_approval_missing` + +### Failure-class notes + +#### `auth_session_failure` +Examples: +- browser relay attached but session logged out +- API token missing or expired +- wallet connector present but no connected account + +Rule: after the second matching hit within the policy window, stop local retries and create/wake one shared auth blocker keyed by the affected account/resource. + +#### `access_denial_antibot` +Rule: never let sibling leaves keep probing the same blocked surface. Switch to a human-assisted or alternate-channel strategy immediately after first confirmed match. + +#### `missing_input` +Rule: if the missing input is shared by multiple children (e.g. target account id, approval token, attachment), collapse it into one prerequisite task and mark dependent leaves as waiting/blocked-by-dependency. + +#### `invalid_state_transition` +Rule: do not spend retry budget repeating an action against a stale assumption. Refresh state, then either replan or close as not-applicable. + +#### `verification_failure` +Rule: for execution tasks, verification failure after a mutation is **not retry-equivalent** to a normal crash. The system must stop and request human review or explicit verification work. + +#### `cost_exhaustion_repeated` +Rule: once the same quota/budget fingerprint repeats across tasks or time windows, it becomes a dispatch-policy problem, not a leaf problem. Route to global throttling or provider switch. + +--- + +## 3) Fingerprint derivation rules + +The executor should derive a fingerprint in four passes: + +1. **Normalize runtime facts** + - task kind + - run phase + - assignee/reviewer + - exit code / signal + - error tail classification + - known provider/model metadata + - target resource / surface + +2. **Assign failure class** + - use deterministic rule table before any model-based classifier + - allow only a bounded fallback classifier for unknown cases + +3. **Resolve routing scope** + - infer whether the cause is leaf-local, shared prerequisite, or global budget + +4. **Build dedupe key** + - `failureClass | resourceKey | surface | reasonCode` + - exclude volatile text (timestamps, raw stack traces, run ids) + +### Example derivations + +```text +stderr: "Telegram Web shows login required" +=> failureClass=auth_session_failure +=> resourceKey=telegram:web:primary +=> scope=shared_prerequisite +=> dedupeKey=auth_session_failure|telegram:web:primary|browser_relay|login_required ``` -### Promotion effects +```text +stderr: "429 Too Many Requests from provider openai-codex" +=> failureClass=rate_limit_transient +=> resourceKey=provider:openai-codex +=> scope=leaf_local (single task) OR global_budget once repeated threshold trips +=> dedupeKey=rate_limit_transient|provider:openai-codex|dispatch|429 +``` -When promotion fires: +```text +stderr: "proposal already published" +=> failureClass=invalid_state_transition +=> resourceKey=proposal:1234 +=> scope=shared_prerequisite if many leaves assume draft state +=> dedupeKey=invalid_state_transition|proposal:1234|mutation_path|already_published +``` -1. pause or mark dependent leaves as waiting on the blocker -2. stop auto-retrying matching siblings -3. create or wake one shared blocker task -4. attach the fingerprint artifact to the blocker and impacted leaves -5. schedule follow-up only after the blocker is resolved or explicitly waived +--- -## Strategy switching +## 4) Strategy-switching rules -Retrying is only one of several recovery strategies. The fingerprint should choose the next lane. +### Canonical strategies -| Failure class | Default next strategy | -| --- | --- | -| `auth_session_failure` | refresh credentials / re-auth before resuming leaves | -| `access_denied` | investigate anti-bot/access policy; do not retry siblings blindly | -| `missing_input` | request input or synthesize a collection subtask | -| `invalid_state_transition` | re-plan from observed state; possibly downgrade assumptions | -| `verification_failure` | inspect artifact/output and regenerate with stronger verification hints | -| `budget_exhaustion` | escalate to planner with summary of failed attempts and alternatives | -| `environment_failure` | switch browser/provider/runtime or wait for recovery | -| `human_approval_missing` | hold for approval, no automatic retries | +| Strategy | Use when | Result | +|---|---|---| +| `retry_same_path` | transient/local issue and retry budget remains | requeue same task with backoff | +| `switch_strategy` | same objective still valid but current path is irrational | reroute to alternate tool/channel/plan | +| `wake_or_create_blocker` | repeated shared blocker across leaves | create or wake one prerequisite task and pause dependents | +| `pause_for_review` | verification or safety-sensitive ambiguity | send to review / human confirmation | +| `global_throttle` | provider or budget exhaustion spans multiple tasks | deny/defer future dispatch until healthy | -### Switching guardrails +### Routing decision table -- Never switch into an irreversible execution lane without confirming approval state. -- When the alternative strategy changes required capabilities, force a new capability assessment. -- Preserve evidence from all failed attempts so the next strategy starts with context. +| Failure class | First hit | Repeated hit | Exhausted state | +|---|---|---|---| +| `auth_session_failure` | retry once if evidence is weak; otherwise create blocker immediately | wake/create shared auth blocker; pause sibling leaves | mark dependency blocker and stop automation until prerequisite closes | +| `access_denial_antibot` | stop same-path retries; request alternate route | shared blocker + human review | quarantine target surface for cooldown window | +| `missing_input` | create/wake prerequisite or ask for input | collapse siblings onto same blocker | leave waiting on prerequisite, no further retries | +| `invalid_state_transition` | refresh state and re-evaluate | replan or close as superseded | no further retries on stale path | +| `verification_failure` | require explicit verification task / review | block execution lane on target | escalate to human with evidence bundle | +| `cost_exhaustion_repeated` | apply local defer/backoff | trigger provider/model/policy switch or dispatch gate deny | global throttle until healthy window returns | -## Dynamic retry and budget policy +### Shared blocker promotion rule -Retry budgets should vary by task kind and failure class. +When all of the following hold, the executor promotes a blocker task: +1. `scope == shared_prerequisite` +2. same `dedupeKey` occurs on **>= 2 tasks** or **>= 2 attempts on one task** within the policy window +3. a blocker with the same `recommendedBlockerKey` is not already active -### Task kinds +Result: +- create or wake one blocker task +- attach `metadata.failureFingerprint.blockerTaskId` +- mark affected leaves as dependency-blocked / waiting on that blocker +- suppress additional same-fingerprint retries until blocker state changes -| Task kind | Description | -| --- | --- | -| `execution` | Deterministic or partially deterministic action in an external system | -| `aggregate` | Gathers or merges child outputs; should preserve partial progress | -| `artifact_only` | Produces or edits docs/artifacts without external side effects | -| `blocker_removal` | Resolves shared prerequisites | -| `verification` | Confirms that a task's output meets acceptance criteria | +### Replanning rule -### Retry budget matrix +If a task hits `invalid_state_transition` or `verification_failure`, the next action should be a replan/verification step, not another leaf retry. The executor should either: +- create a child task for state refresh / verification, or +- send the task back to review with the fingerprint attached. -| Task kind | Transient infra (`environment_failure`) | Shared blocker (`auth_session_failure`, `access_denied`) | Planning/input (`missing_input`, `invalid_state_transition`) | Verification/content (`verification_failure`) | Budget exhaustion | -| --- | --- | --- | --- | --- | --- | -| `execution` | 2 immediate retries, then switch environment | 1 local retry max, then promote blocker | 1 retry after re-plan/input refresh | 1 fix-and-retry cycle | escalate immediately | -| `aggregate` | 1 retry after dependency health check | 0 direct retries; wait on blocker | re-plan once with preserved partials | 1 recompute after child repair | escalate immediately | -| `artifact_only` | 1 retry if tooling failed | 0 blocker retries unless dependency is local tooling | 2 retries with clarified prompt/spec | 2 revision cycles | escalate after second exhaustion | -| `blocker_removal` | 2 retries with alternate method/provider | 2 attempts before human escalation | 1 retry after missing prerequisite is gathered | 1 verification rerun | escalate immediately | -| `verification` | 1 rerun with fresh evidence | 0 retries; route to blocker | 1 re-verify after state refresh | 1 manual/alternate verifier pass | escalate immediately | +--- -### Budget principles +## 5) Dynamic retry budget policy -- **Execution leaves** get a small number of direct retries because some failures are transient, but they should pivot quickly to blocker removal. -- **Aggregate tasks** should protect accumulated child output and avoid consuming retries on a bad dependency. -- **Artifact-only tasks** can tolerate more revision cycles because they are cheap and reversible. -- **Verification failures** should trigger stronger evidence capture rather than repeated blind checks. +Retry budgets must be keyed by **task kind** and **failure class**, not one global `MAX_RETRIES`. -## Incident and audit artifacts +### Policy table (recommended v1) -Every promoted blocker or escalation should persist a compact artifact: +| Task kind | Failure class | Auto retries | Backoff class | On exhaustion | +|---|---|---:|---|---| +| `execution` | `rate_limit_transient` | 2 | long | switch provider or defer via gate | +| `execution` | `auth_session_failure` | 1 | short | create/wake auth blocker | +| `execution` | `access_denial_antibot` | 0 | none | human unblock / alternate path | +| `execution` | `missing_input` | 0 | none | prerequisite task | +| `execution` | `invalid_state_transition` | 0 | none | refresh + replan | +| `execution` | `verification_failure` | 0 | none | review / verification task | +| `execution` | `tool_runtime_unavailable` | 1 | medium | reroute tool / pause | +| `aggregate` | `dependency_blocked` | 0 | none | wait for required children / blocker | +| `aggregate` | `missing_input` | 0 | none | request missing artifact coverage | +| `aggregate` | `tool_runtime_unavailable` | 1 | short | rerun reducer/export path | +| `aggregate` | `verification_failure` | 1 | short | review aggregate evidence | +| `artifact_only` | `tool_runtime_unavailable` | 2 | short | reroute agent/tool | +| `artifact_only` | `missing_input` | 0 | none | request source material | +| `artifact_only` | `invalid_state_transition` | 0 | none | usually close/supersede, not retry | +| `artifact_only` | `cost_exhaustion_repeated` | 1 | long | defer until budget recovers | +| `review` | `tool_runtime_unavailable` | 1 | short | reroute reviewer | +| `review` | `verification_failure` | 0 | none | escalate to human reviewer | + +### Why the policies differ + +- **Execution tasks** carry the most risk; most non-transient classes should not auto-retry. +- **Aggregate tasks** should rarely retry; repeated child blockers are usually dependency issues, not execution failures. +- **Artifact-only tasks** can tolerate slightly more retry on tooling failures because they do not directly mutate external state. + +### Budget accounting model + +Track two counters per task attempt window: +1. **`pathRetryCount`** — retries on the same strategy/path +2. **`strategySwitchCount`** — number of alternate paths already tried + +This prevents a task from escaping budget control by bouncing endlessly between weak alternatives. + +Recommended defaults: +- `execution`: `pathRetryCount <= 2`, `strategySwitchCount <= 1` +- `aggregate`: `pathRetryCount <= 1`, `strategySwitchCount <= 1` +- `artifact_only`: `pathRetryCount <= 2`, `strategySwitchCount <= 2` + +--- + +## 6) Proposed metadata contract + +Add the following metadata shape to task records: ```json { - "taskId": "T2001", - "fingerprintHash": "sha256:...", - "class": "verification_failure", - "scope": "leaf", - "attempt": 2, - "nextStrategy": "fix_and_retry", - "relatedBlockerTaskId": null, - "capturedAt": "2026-03-13T10:00:00Z" + "metadata": { + "retryPolicy": { + "taskKind": "execution", + "pathRetryCount": 1, + "strategySwitchCount": 0, + "budgetWindow": "30m", + "lastBudgetDecision": "wake_or_create_blocker" + }, + "failureFingerprint": { + "fingerprintId": "ffp_01HV...", + "failureClass": "auth_session_failure", + "scope": "shared_prerequisite", + "dedupeKey": "auth_session_failure|telegram:web:primary|browser_relay|login_required", + "attemptsWindow": 2, + "firstSeenAt": "2026-03-13T09:00:00Z", + "lastSeenAt": "2026-03-13T09:07:00Z", + "recommendedStrategy": "wake_or_create_blocker", + "blockerTaskId": 1730 + }, + "blockedByFingerprint": true, + "sharedBlockerKey": "blocker:auth:telegram:web:primary" + } } ``` -Minimum audit payload: +### Executor outcome log extension + +Each `executor_outcomes.jsonl` record should append: +- `taskKind` +- `failureClass` +- `fingerprintDedupeKey` +- `routingDecision` +- `scope` +- `strategySwitched` (bool) +- `blockerTaskId` (if any) + +This is the minimum observability needed to prove sibling churn actually dropped after rollout. + +--- + +## 7) Shared blocker lifecycle + +### Blocker task creation contract + +When promoting shared blocker work, the system should create a task with: +- kind: `capability_probe` or `blocker_removal` +- title: deterministic and resource-based +- description: include fingerprint class, affected resource, and evidence bundle +- metadata: + - `blockerKey` + - `sourceFingerprint` + - `affectedTaskIds` + - `createdFromFailureRouter=true` + +### Wake vs create + +- **Wake existing blocker** when an active/pending blocker has the same `blockerKey`. +- **Create new blocker** only when no active blocker exists. + +### Leaf behavior while blocker active + +Affected leaves should not continue ordinary retry scheduling. Instead they should move to a dependency-held condition with: +- blocker task id +- blocker key +- fingerprint id +- timestamp of last routed match + +This is the mechanism that stops sibling retry burn. + +--- + +## 8) Observability and dashboards + +Required dashboard/reporting surfaces: +1. **Top repeated fingerprints** in the last 24h +2. **Shared blocker promotions** and number of leaves collapsed behind each blocker +3. **Retries avoided** after blocker promotion +4. **Failure-class breakdown by task kind** +5. **Global budget throttles** with provider/model linkage + +Key success metrics: +- drop in repeated identical failures per sibling set +- increase in blocker reuse rate +- reduction in tasks blocked only after max retries +- fewer execution tasks auto-retrying on verification failures + +--- + +## 9) Acceptance tests for the implementation task + +### A. Shared auth blocker +1. Two execution leaves fail with the same expired-session fingerprint. +2. System creates/wakes one auth blocker. +3. Sibling leaves stop consuming retries. +4. Both leaves reference the same blocker task id/key. + +### B. Anti-bot/access denial +1. First confirmed anti-bot fingerprint occurs on an execution task. +2. Executor does **not** requeue same path. +3. Alternate route or human unblock task is requested. + +### C. Missing shared input +1. Multiple tasks lack the same approval token / file. +2. One prerequisite task is created. +3. Additional failures attach to existing blocker instead of creating duplicates. + +### D. Invalid state transition +1. Execution task tries to mutate an already-finalized resource. +2. No same-path retry occurs. +3. Task routes to refresh/replan or closes as superseded. + +### E. Verification failure +1. Mutation step appears to succeed but verification check is inconclusive. +2. Task does not spend standard retry budget. +3. Task escalates to review/verification work. + +### F. Dynamic budgets by task kind +1. `artifact_only` task with tool outage gets >0 retries. +2. `aggregate` task with dependency blocker gets 0 same-path retries. +3. `execution` task with auth failure gets <=1 retry before blocker promotion. + +### G. Repeated cost exhaustion +1. Same provider quota exhaustion hits multiple tasks in window. +2. A global throttle / provider-switch path is activated. +3. Future dispatch is deferred rather than burning leaf retries. -- task ID and attempt number -- fingerprint hash and canonical class -- normalized evidence -- chosen next strategy -- whether a blocker was created or reused -- why automatic retries stopped or continued +--- -## Acceptance criteria +## 10) Implementation order recommendation -This design is implemented successfully when: +1. Add fingerprint schema + deterministic classifier in executor. +2. Persist metadata and outcome-log extensions. +3. Add blocker promotion / wake semantics. +4. Add per-task-kind retry policy table. +5. Add dashboard aggregation + regression tests. -1. repeated identical failures no longer consume blind retries across many siblings -2. shared blockers are promoted to prerequisite tasks when routing rules match -3. retry policy differs for execution, aggregate, and artifact-only tasks -4. incidents and blockers can reference stable fingerprint hashes -5. strategy switches preserve evidence and partial progress instead of restarting from scratch +This order ensures the system can first *see* repeated failure identity, then *route* it, then *enforce* differentiated budgets. -## Recommended implementation order +--- -1. add fingerprint schema + hashing utility -2. emit fingerprints from execution and verification failure paths -3. index active failures by `(hash, class, resource, scope)` -4. add blocker promotion and sibling pause/wake behavior -5. wire retry matrix into task-kind-specific policy evaluation -6. emit incident artifacts and reviewer-facing summaries +## Bottom line -## Open questions +T1712 should change TaskCore from **"every failure increments one retry counter"** to **"each failure class carries a scoped fingerprint and a bounded next strategy."** -- Should fingerprint hashes include capability profile/version so planner changes do not over-dedupe unrelated failures? -- Do we want per-provider routing scopes in addition to environment scope? -- When a blocker is waived manually, should matching leaves resume automatically or require explicit review? +That is the key behavior change needed to stop repeated sibling churn, promote blocker-removal work when appropriate, and make retry policy depend on both **what failed** and **what kind of task failed**. From a4718f9dae07f85add6372df615078a873b7f0db Mon Sep 17 00:00:00 2001 From: krandder Date: Sat, 14 Mar 2026 04:34:34 +0000 Subject: [PATCH 6/8] T1811: implement plan-based decomposition (task plan) - Add plan-parse.ts: markdown plan parser with cost allocation - Wire `task plan` and `task decompose plan` CLI commands - Both parse checklist/ordered/bullet markdown into child tasks - Cost allocation: explicit costs honored, remainder split evenly - Support --file, --stdin, positional text, --strategy sequential|parallel - 32 unit tests for parser and cost allocation (all passing) - Update task-cli-spec.md with plan command documentation - Update prompt guidance to surface plan command in decomposition phase Co-Authored-By: Claude Opus 4.6 --- core/cli/plan-parse.ts | 258 +++++++++++++++++++++++++++++++++ core/cli/task.ts | 269 ++++++++++++++++++++++++++++++++++- core/test/cli-plan.test.ts | 221 ++++++++++++++++++++++++++++ core/test/plan-parse.test.ts | 269 +++++++++++++++++++++++++++++++++++ docs/task-cli-spec.md | 99 +++++++++++++ middle/prompt.ts | 3 +- 6 files changed, 1111 insertions(+), 8 deletions(-) create mode 100644 core/cli/plan-parse.ts create mode 100644 core/test/cli-plan.test.ts create mode 100644 core/test/plan-parse.test.ts diff --git a/core/cli/plan-parse.ts b/core/cli/plan-parse.ts new file mode 100644 index 0000000..1da2167 --- /dev/null +++ b/core/cli/plan-parse.ts @@ -0,0 +1,258 @@ +/** + * plan-parse.ts — parse markdown-ish plans into child task specs and allocate costs. + * + * Supports: + * - Checklist items: `- [ ] title` or `- [x] title` + * - Ordered items: `1. title` + * - Bullet items: `- title` + * - Headings: `# Section` — prefix subsequent items' titles with "Section: " + * - Continuation lines: indented (2+ spaces) lines after an item enrich its description + * - Trailing meta: `(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true)` + */ + +export interface ParsedItem { + title: string; + description: string; + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +export interface AllocatedItem { + title: string; + description: string; + cost: number; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +interface ItemMeta { + cost?: number; + assignee?: string; + reviewer?: string; + skipAnalysis?: boolean; +} + +function extractMeta(raw: string): { baseTitle: string; meta: ItemMeta } { + // Match trailing parenthetical: `title (key: val, key: val)` + const parenMatch = raw.match(/^(.*?)\s*\(([^)]+)\)\s*$/); + if (!parenMatch) return { baseTitle: raw.trim(), meta: {} }; + + const baseTitle = parenMatch[1]!.trim(); + const metaStr = parenMatch[2]!; + const meta: ItemMeta = {}; + + for (const pair of metaStr.split(",")) { + const colonIdx = pair.indexOf(":"); + if (colonIdx < 0) continue; + const key = pair.slice(0, colonIdx).trim().toLowerCase(); + const value = pair.slice(colonIdx + 1).trim(); + + switch (key) { + case "cost": { + const n = Number(value); + if (Number.isFinite(n) && n > 0) meta.cost = n; + break; + } + case "assignee": + if (value) meta.assignee = value; + break; + case "reviewer": + if (value) meta.reviewer = value; + break; + case "skip-analysis": + meta.skipAnalysis = value === "true" || value === "1" || value === "yes"; + break; + } + } + return { baseTitle, meta }; +} + +interface PendingItem { + title: string; + rawTitle: string; // item text without heading prefix, used as default description + cost: number | undefined; + assignee: string | undefined; + reviewer: string | undefined; + skipAnalysis: boolean; +} + +/** + * Parse a markdown-ish plan text into ParsedItem[]. + * + * Item markers: + * - `- [ ] title` / `- [x] title` — checklist + * - `1. title` — ordered list + * - `- title` — plain bullet + * + * Headings (`# …`) set a context prefix applied to all following items until + * the next heading. + * + * Indented lines (2+ spaces) following an item are appended to its description. + * + * A trailing parenthetical `(key: value, …)` on any item line supplies metadata. + */ +export function parsePlan(text: string): ParsedItem[] { + const lines = text.split("\n"); + const items: ParsedItem[] = []; + let headingContext = ""; + let pending: PendingItem | null = null; + const descLines: string[] = []; + + function flush(): void { + if (!pending) return; + const description = descLines.length > 0 ? descLines.join("\n") : pending.rawTitle; + items.push({ + title: pending.title, + description, + cost: pending.cost, + assignee: pending.assignee, + reviewer: pending.reviewer, + skipAnalysis: pending.skipAnalysis, + }); + pending = null; + descLines.length = 0; + } + + for (const line of lines) { + const trimmed = line.trim(); + + // Blank lines — skip (don't flush; a blank between item and continuation is fine) + if (trimmed === "") continue; + + // Heading: reset context prefix + const headingMatch = trimmed.match(/^#{1,6}\s+(.+)$/); + if (headingMatch) { + flush(); + headingContext = headingMatch[1]!.trim(); + continue; + } + + // Checklist: `- [ ] title` or `- [x] title` (any char inside brackets) + const checklistMatch = trimmed.match(/^-\s+\[[^\]]*\]\s+(.+)$/); + // Ordered: `1. title` (must not have already matched checklist) + const orderedMatch = !checklistMatch ? trimmed.match(/^\d+\.\s+(.+)$/) : null; + // Bullet: `- title` (only when neither of the above matched) + const bulletMatch = !checklistMatch && !orderedMatch ? trimmed.match(/^-\s+(.+)$/) : null; + + const itemText = checklistMatch?.[1] ?? orderedMatch?.[1] ?? bulletMatch?.[1]; + + if (itemText !== undefined) { + flush(); + const { baseTitle, meta } = extractMeta(itemText); + const prefix = headingContext ? `${headingContext}: ` : ""; + pending = { + title: prefix + baseTitle, + rawTitle: baseTitle, + cost: meta.cost, + assignee: meta.assignee, + reviewer: meta.reviewer, + skipAnalysis: meta.skipAnalysis ?? false, + }; + continue; + } + + // Continuation: indented line (2+ spaces or a tab) while we have a pending item + if (pending && /^[ \t]{2,}/.test(line)) { + descLines.push(trimmed); + continue; + } + + // Unrecognised line — flush pending (don't silently swallow item boundaries) + flush(); + } + + flush(); + return items; +} + +/** + * Allocate costs to items. Items with an explicit `cost` keep it; items without + * share the remaining budget evenly. Uses integer arithmetic (cents) to avoid + * float drift. + * + * Returns an error string on failure, or the allocated items on success. + */ +export function allocateCosts( + items: ParsedItem[], + budgetRemainingDollars: number, +): { ok: true; items: AllocatedItem[] } | { ok: false; error: string } { + if (items.length === 0) { + return { ok: false, error: "plan contains no items" }; + } + + // Work in integer cents to avoid float drift + const budgetCents = Math.round(budgetRemainingDollars * 100); + if (budgetCents <= 0) { + return { + ok: false, + error: `no positive budget remaining (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + let specifiedCents = 0; + let unspecifiedCount = 0; + for (const item of items) { + if (item.cost !== undefined) { + specifiedCents += Math.round(item.cost * 100); + } else { + unspecifiedCount++; + } + } + + if (specifiedCents > budgetCents) { + return { + ok: false, + error: + `explicit costs (${(specifiedCents / 100).toFixed(2)}) exceed remaining budget` + + ` (${budgetRemainingDollars.toFixed(2)})`, + }; + } + + const remainingCents = budgetCents - specifiedCents; + let perItemCents = 0; + let extraCents = 0; + + if (unspecifiedCount > 0) { + if (remainingCents <= 0) { + return { + ok: false, + error: `no budget remains for ${unspecifiedCount} item(s) without explicit cost`, + }; + } + perItemCents = Math.floor(remainingCents / unspecifiedCount); + extraCents = remainingCents % unspecifiedCount; + if (perItemCents === 0) { + return { + ok: false, + error: + `remaining budget (${(remainingCents / 100).toFixed(2)}) too small to distribute` + + ` across ${unspecifiedCount} uncosted item(s)`, + }; + } + } + + // Distribute extra cents to the first items (deterministic, no float drift) + let extraGiven = 0; + const allocated: AllocatedItem[] = items.map((item) => { + let costCents: number; + if (item.cost !== undefined) { + costCents = Math.round(item.cost * 100); + } else { + costCents = perItemCents + (extraGiven < extraCents ? 1 : 0); + if (extraGiven < extraCents) extraGiven++; + } + return { + title: item.title, + description: item.description, + cost: costCents / 100, + assignee: item.assignee, + reviewer: item.reviewer, + skipAnalysis: item.skipAnalysis, + }; + }); + + return { ok: true, items: allocated }; +} diff --git a/core/cli/task.ts b/core/cli/task.ts index 19192a2..9d71699 100644 --- a/core/cli/task.ts +++ b/core/cli/task.ts @@ -11,6 +11,7 @@ import { filterClaimableTasks, includeReviewQueueTask, } from "./claimability.js"; +import { allocateCosts, parsePlan } from "./plan-parse.js"; // Handle EPIPE errors gracefully (e.g., when piping to head) process.stdout.on("error", (err: NodeJS.ErrnoException) => { @@ -535,7 +536,9 @@ function phaseGuidance(phase: string, condition: string): string[] { case "decomposition.active": return [ "You're decomposing this task. Next steps:", - " task decompose start # begin decomposition", + " task plan --file plan.md # materialize a whole checklist in one shot", + " task decompose plan --stdin # same flow, reading markdown from stdin", + " task decompose start # fallback: build children incrementally", " task decompose add <...> # add a child task", " task decompose commit # finalize children", " task decompose cancel # cancel decomposition", @@ -768,7 +771,8 @@ function printHelp(): void { " task update ", " task analyze", " task decide ", - " task decompose ...", + " task plan | --file | --stdin # plan-based decomposition", + " task decompose ...", " task review ...", " task journal ...", " task worktree", @@ -1520,10 +1524,12 @@ async function cmdClaim(argv: string[], jsonMode: boolean): Promise { process.stdout.write(" task review reject --force — reject permanently (prefer request-changes)\n"); process.stdout.write(" task review request-changes \"notes\" — request changes\n"); } else if (phase === "decomposition") { - process.stdout.write(" task decompose start — begin decomposition\n"); - process.stdout.write(" task decompose add \"...\" — add a child task\n"); - process.stdout.write(" task decompose commit — finalize children\n"); - process.stdout.write(" task decompose cancel — cancel decomposition\n"); + process.stdout.write(" task plan --file plan.md — commit a whole checklist in one shot\n"); + process.stdout.write(" task decompose plan --stdin — same flow, reading markdown from stdin\n"); + process.stdout.write(" task decompose start — fallback: begin incremental decomposition\n"); + process.stdout.write(" task decompose add \"...\" — add a child task\n"); + process.stdout.write(" task decompose commit — finalize children\n"); + process.stdout.write(" task decompose cancel — cancel decomposition\n"); } else { process.stdout.write(" task submit \"what you did\" — when done\n"); process.stdout.write(" task complete \"evidence\" — done, no review needed\n"); @@ -2005,9 +2011,221 @@ async function cmdDecompose(argv: string[], jsonMode: boolean): Promise { return; } + case "plan": { + const { flags } = parseFlags(args); + + // Read plan text from --items, --file, or --stdin + let planText: string; + const itemsFlag = getFlagString(flags, "items"); + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (itemsFlag !== undefined) { + planText = itemsFlag; + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task decompose plan --items | --file | --stdin\n" + + " [--strategy sequential|parallel]", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan into items + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); + } + + // Fetch parent task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs (explicit + auto-distribution) + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array for the one-shot endpoint + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write("--- Decomposition committed ---\n\n"); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} children:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); + return; + } + default: - throw new CliError("Usage: task decompose ...", 1); + throw new CliError("Usage: task decompose ...", 1); + } +} + +/** + * `task plan` — top-level shortcut for plan-based decomposition. + * + * Usage: + * task plan "- [ ] step one\n- [ ] step two" + * task plan --file plan.md + * task plan --stdin + * task plan "- step one" --strategy parallel + * + * Equivalent to `task decompose plan` but more natural for agents. + * Accepts plan text as a positional arg (most common), --file, or --stdin. + */ +async function cmdPlan(argv: string[], jsonMode: boolean): Promise { + requireAgentId(); + const taskId = currentTaskId(); + const { positionals, flags } = parseFlags(argv); + + // Read plan text from positional arg, --file, or --stdin + let planText: string; + const fileFlag = getFlagString(flags, "file"); + const useStdin = getFlagBool(flags, "stdin"); + + if (positionals.length > 0) { + planText = positionals.join(" "); + } else if (fileFlag !== undefined) { + if (!fs.existsSync(fileFlag)) { + throw new CliError(`File not found: ${fileFlag}`, 1); + } + planText = fs.readFileSync(fileFlag, "utf-8"); + } else if (useStdin) { + planText = fs.readFileSync(0, "utf-8"); + } else { + throw new CliError( + "Usage: task plan | --file | --stdin\n" + + " [--strategy sequential|parallel]\n" + + "\n" + + "Plan format:\n" + + " - [ ] Step title (cost: 10, assignee: coder)\n" + + " 1. Ordered step\n" + + " - Bullet step\n" + + " # Section heading\n" + + " Indented lines extend item descriptions.", + 1, + ); + } + + const strategyRaw = getFlagString(flags, "strategy") ?? "sequential"; + if (strategyRaw !== "sequential" && strategyRaw !== "parallel") { + throw new CliError("--strategy must be 'sequential' or 'parallel'", 1); + } + const strategy = strategyRaw as "sequential" | "parallel"; + + // Parse the plan + const parsedItems = parsePlan(planText); + if (parsedItems.length === 0) { + throw new CliError( + "No items found in plan. Use checklist (- [ ] step), ordered (1. step), or bullet (- step) format.", + 1, + ); } + + // Fetch task to determine remaining budget + const task = await getTask(taskId); + const costObj = asRecord(task["cost"]); + const budgetRemaining = costObj + ? (asNumber(costObj["allocated"]) ?? 0) + - (asNumber(costObj["consumed"]) ?? 0) + - (asNumber(costObj["childAllocated"]) ?? 0) + + (asNumber(costObj["childRecovered"]) ?? 0) + : 0; + + // Allocate costs + const allocation = allocateCosts(parsedItems, budgetRemaining); + if (!allocation.ok) { + throw new CliError(`Cost allocation failed: ${(allocation as { ok: false; error: string }).error}`, 1); + } + + // Build children array + const children = allocation.items.map((item) => { + const child: Record = { + title: item.title, + description: item.description, + costAllocation: item.cost, + }; + if (item.assignee) child["assignee"] = item.assignee; + if (item.reviewer) child["reviewer"] = item.reviewer; + if (item.skipAnalysis) child["skipAnalysis"] = true; + return child; + }); + + const response = await apiRequest("POST", `/tasks/${taskId}/decompose`, { + children, + coordinationMode: strategy, + }); + + if (jsonMode) { + process.stdout.write(JSON.stringify(response, null, 2) + "\n"); + return; + } + + process.stdout.write(`--- Plan materialized for T${taskId} ---\n\n`); + const responseChildren = asArray(response["children"]) + .map((c) => asRecord(c)) + .filter((c): c is Record => c !== null); + process.stdout.write(`Created ${responseChildren.length} child tasks:\n`); + for (const child of responseChildren) { + process.stdout.write( + ` T${getString(child, "id", "?")}: ${getString(child, "title", "(untitled)")} ${formatMoney(child["costAllocation"])}\n`, + ); + } + process.stdout.write("\nParent task is now a coordinator. Children will execute the plan.\n"); + + const planAgentId = process.env["TASKCORE_AGENT_ID"]; + if (planAgentId) clearActiveTask(planAgentId); } function ensureContextWithTaskId(taskId: string): TaskContext { @@ -2606,6 +2824,27 @@ const subcommandHelp: Record = { " decompose Task will be split into subtasks", ].join("\n"), + plan: [ + "task plan | --file | --stdin", + "", + "One-shot plan-based decomposition. Parses a markdown plan into", + "child tasks and materializes them under the current task.", + "Equivalent to `task decompose plan` but accepts plan as a positional arg.", + "", + "Options:", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format:", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", + ].join("\n"), + decompose: [ "task decompose ...", "", @@ -2620,6 +2859,19 @@ const subcommandHelp: Record = { " --skip-analysis Skip analysis phase for child", " commit Commit the decomposition", " cancel Cancel pending decomposition", + " plan One-shot plan-based decomposition", + " --items Inline plan text", + " --file Read plan from file", + " --stdin Read plan from stdin", + " --strategy sequential|parallel Coordination mode (default: sequential)", + "", + "Plan format (for 'plan' subcommand):", + " - [ ] Step title (cost: 10, assignee: coder)", + " 1. Ordered step", + " - Bullet step", + " # Section heading — prefixes following items with 'Section: '", + " Indented lines (2+ spaces) extend the item description.", + " Trailing (key: val) metadata: cost, assignee, reviewer, skip-analysis", ].join("\n"), review: [ @@ -2799,6 +3051,9 @@ async function run(argv: string[]): Promise { case "decide": await cmdDecide(rest, jsonMode); return; + case "plan": + await cmdPlan(rest, jsonMode); + return; case "decompose": await cmdDecompose(rest, jsonMode); return; diff --git a/core/test/cli-plan.test.ts b/core/test/cli-plan.test.ts new file mode 100644 index 0000000..4d50269 --- /dev/null +++ b/core/test/cli-plan.test.ts @@ -0,0 +1,221 @@ +import * as http from "node:http"; +import { spawnSync } from "node:child_process"; +import { beforeEach, afterEach, test } from "node:test"; +import * as assert from "node:assert/strict"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { OrchestrationCore } from "../index.js"; +import { createHttpServer } from "../../middle/http.js"; +import { loadConfig, type Config } from "../../middle/config.js"; +import { initJournalRepo } from "../../middle/journal.js"; + +let server: http.Server; +let core: OrchestrationCore; +let tmpDir: string; +let port: number; +let config: Config; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); + +function request( + method: string, + urlPath: string, + body?: unknown, +): Promise<{ status: number; body: unknown }> { + return new Promise((resolve, reject) => { + const data = body ? JSON.stringify(body) : undefined; + const req = http.request( + { + hostname: "127.0.0.1", + port, + path: urlPath, + method, + headers: data + ? { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + } + : {}, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (chunk: Buffer) => chunks.push(chunk)); + res.on("end", () => { + const raw = Buffer.concat(chunks).toString("utf-8"); + try { + resolve({ status: res.statusCode ?? 500, body: JSON.parse(raw) }); + } catch { + resolve({ status: res.statusCode ?? 500, body: raw }); + } + }); + }, + ); + req.on("error", reject); + if (data) req.write(data); + req.end(); + }); +} + +async function setup(): Promise { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-cli-plan-")); + const dbPath = path.join(tmpDir, "test.db"); + port = 19800 + Math.floor(Math.random() * 1000); + const journalRepoPath = path.join(tmpDir, "journal"); + const worktreeBaseDir = path.join(tmpDir, "worktrees"); + const workspaceDir = path.join(tmpDir, "workspace"); + const agentRegistry = path.join(tmpDir, "registry.json"); + + fs.mkdirSync(workspaceDir, { recursive: true }); + fs.mkdirSync(path.join(workspaceDir, "data"), { recursive: true }); + fs.writeFileSync(agentRegistry, JSON.stringify({ + agents: [ + { id: "coder", assignable: true, reviewer: true, consulted: true }, + { id: "overseer", assignable: true, reviewer: true, consulted: true }, + ], + }, null, 2)); + initJournalRepo(journalRepoPath); + + core = new OrchestrationCore({ + dbPath, + invariantChecks: true, + snapshotEvery: 50, + }); + + config = { + ...loadConfig(), + port, + dbPath, + agentRegistry, + workspaceDir, + journalRepoPath, + worktreeBaseDir, + runtimeFile: "", + }; + + server = createHttpServer(core, config); + await new Promise((resolve) => { + server.listen(port, "127.0.0.1", resolve); + }); +} + +async function teardown(): Promise { + await new Promise((resolve) => server.close(() => resolve())); + core.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +} + +beforeEach(setup); +afterEach(teardown); + +test("task plan materializes a markdown checklist into child tasks end-to-end", async () => { + const createRes = await request("POST", "/tasks", { + title: "Plan import parent", + description: "Verify task plan end-to-end", + assignee: "coder", + costBudget: 25, + }); + assert.equal(createRes.status, 201); + + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const planPath = path.join(tmpDir, "plan.md"); + fs.writeFileSync(planPath, [ + "# Planning", + "- [ ] Parse markdown checklist (cost: 5, assignee: coder)", + " Capture headings and metadata.", + "- [ ] Materialize children (reviewer: overseer)", + " Verify the parent/child shape in taskcore/dashboard.", + ].join("\n")); + + const cli = spawnSync( + process.execPath, + ["--import", "tsx", "core/cli/task.ts", "plan", "--file", planPath], + { + cwd: repoRoot, + env: { + ...process.env, + ORCHESTRATOR_PORT: String(port), + TASKCORE_AGENT_ID: "coder", + TASK_ID: "1", + }, + encoding: "utf-8", + }, + ); + + assert.equal(cli.status, 0, `stdout:\n${cli.stdout}\n\nstderr:\n${cli.stderr}`); + assert.match(cli.stdout, /Plan materialized for T1/); + assert.match(cli.stdout, /Created 2 child tasks/); + + const parentRes = await request("GET", "/tasks/1"); + assert.equal(parentRes.status, 200); + const parent = (parentRes.body as { + task: { + phase: string; + condition: string; + children: string[]; + coordination?: { mode?: string }; + }; + }).task; + + assert.equal(parent.phase, "analysis"); + assert.equal(parent.condition, "waiting"); + assert.deepEqual(parent.children, ["2", "3"]); + assert.equal(parent.coordination?.mode, "sequential_children"); + + const childrenRes = await request("GET", "/tasks?parentId=1&full=true"); + assert.equal(childrenRes.status, 200); + const children = (childrenRes.body as { + tasks: Array<{ + id: string; + title: string; + description: string; + phase: string; + condition: string; + metadata: { assignee?: string; reviewer?: string }; + cost: { allocated: number }; + }>; + }).tasks; + + assert.equal(children.length, 2); + assert.deepEqual( + children.map((child) => ({ + id: child.id, + title: child.title, + description: child.description, + phase: child.phase, + condition: child.condition, + assignee: child.metadata.assignee, + reviewer: child.metadata.reviewer, + allocated: child.cost.allocated, + })), + [ + { + id: "2", + title: "Planning: Parse markdown checklist", + description: "Capture headings and metadata.", + phase: "analysis", + condition: "ready", + assignee: "coder", + reviewer: undefined, + allocated: 5, + }, + { + id: "3", + title: "Planning: Materialize children", + description: "Verify the parent/child shape in taskcore/dashboard.", + phase: "analysis", + condition: "waiting", + assignee: undefined, + reviewer: "overseer", + allocated: 20, + }, + ], + ); +}); diff --git a/core/test/plan-parse.test.ts b/core/test/plan-parse.test.ts new file mode 100644 index 0000000..e1e4543 --- /dev/null +++ b/core/test/plan-parse.test.ts @@ -0,0 +1,269 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { allocateCosts, parsePlan } from "../cli/plan-parse.js"; + +// --------------------------------------------------------------------------- +// Parser tests +// --------------------------------------------------------------------------- + +test("parsePlan: checklist items", () => { + const items = parsePlan("- [ ] First step\n- [ ] Second step"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: checked and unchecked checklist both parse", () => { + const items = parsePlan("- [ ] Pending\n- [x] Done\n- [X] Also done"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Pending"); + assert.equal(items[1]!.title, "Done"); + assert.equal(items[2]!.title, "Also done"); +}); + +test("parsePlan: ordered list items", () => { + const items = parsePlan("1. Alpha\n2. Beta\n3. Gamma"); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Alpha"); + assert.equal(items[1]!.title, "Beta"); + assert.equal(items[2]!.title, "Gamma"); +}); + +test("parsePlan: plain bullet items", () => { + const items = parsePlan("- Do thing A\n- Do thing B"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Do thing A"); + assert.equal(items[1]!.title, "Do thing B"); +}); + +test("parsePlan: headings prefix subsequent items", () => { + const items = parsePlan("# Setup\n- Install deps\n# Cleanup\n- Remove temp files"); + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "Setup: Install deps"); + assert.equal(items[1]!.title, "Cleanup: Remove temp files"); +}); + +test("parsePlan: heading context resets on new heading", () => { + const items = parsePlan("# Phase 1\n- Item A\n# Phase 2\n- Item B"); + assert.equal(items[0]!.title, "Phase 1: Item A"); + assert.equal(items[1]!.title, "Phase 2: Item B"); +}); + +test("parsePlan: no heading means no prefix", () => { + const items = parsePlan("- Plain item"); + assert.equal(items[0]!.title, "Plain item"); +}); + +test("parsePlan: indented continuation lines become description", () => { + const items = parsePlan( + "- Do the thing\n With extra context here\n And more details", + ); + assert.equal(items.length, 1); + assert.equal(items[0]!.title, "Do the thing"); + assert.equal(items[0]!.description, "With extra context here\nAnd more details"); +}); + +test("parsePlan: description defaults to item text when no continuations", () => { + const items = parsePlan("- Simple item"); + assert.equal(items[0]!.description, "Simple item"); +}); + +test("parsePlan: blank lines between items are ignored", () => { + const items = parsePlan("- First\n\n- Second\n\n- Third"); + assert.equal(items.length, 3); +}); + +test("parsePlan: unrecognised prose flushes the pending item", () => { + const items = parsePlan([ + "- First step", + "This paragraph should not become a continuation line", + "- Second step", + ].join("\n")); + + assert.equal(items.length, 2); + assert.equal(items[0]!.title, "First step"); + assert.equal(items[0]!.description, "First step"); + assert.equal(items[1]!.title, "Second step"); +}); + +test("parsePlan: inline cost metadata", () => { + const items = parsePlan("- Do something (cost: 15)"); + assert.equal(items[0]!.title, "Do something"); + assert.equal(items[0]!.cost, 15); +}); + +test("parsePlan: inline assignee and reviewer", () => { + const items = parsePlan("- Do something (assignee: coder, reviewer: overseer)"); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "overseer"); +}); + +test("parsePlan: skip-analysis flag true", () => { + const items = parsePlan("- Trivial task (skip-analysis: true)"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: skip-analysis flag false", () => { + const items = parsePlan("- Normal task (skip-analysis: false)"); + assert.equal(items[0]!.skipAnalysis, false); +}); + +test("parsePlan: combined metadata", () => { + const items = parsePlan( + "- Full item (cost: 20, assignee: coder, reviewer: lead, skip-analysis: true)", + ); + assert.equal(items[0]!.title, "Full item"); + assert.equal(items[0]!.cost, 20); + assert.equal(items[0]!.assignee, "coder"); + assert.equal(items[0]!.reviewer, "lead"); + assert.equal(items[0]!.skipAnalysis, true); +}); + +test("parsePlan: metadata stripped from title", () => { + const items = parsePlan("- Fix the bug (cost: 5)"); + assert.equal(items[0]!.title, "Fix the bug"); + assert.equal(items[0]!.cost, 5); +}); + +test("parsePlan: heading prefix not applied to description", () => { + const items = parsePlan("# Phase\n- Task title"); + assert.equal(items[0]!.title, "Phase: Task title"); + assert.equal(items[0]!.description, "Task title"); // description has no heading prefix +}); + +test("parsePlan: mixed formats in one plan", () => { + const plan = [ + "# Bootstrap", + "- [ ] Install dependencies (cost: 5)", + "1. Configure environment (cost: 10, assignee: infra)", + "# Feature", + "- Build the thing", + " Detailed instructions here", + ].join("\n"); + + const items = parsePlan(plan); + assert.equal(items.length, 3); + assert.equal(items[0]!.title, "Bootstrap: Install dependencies"); + assert.equal(items[0]!.cost, 5); + assert.equal(items[1]!.title, "Bootstrap: Configure environment"); + assert.equal(items[1]!.cost, 10); + assert.equal(items[1]!.assignee, "infra"); + assert.equal(items[2]!.title, "Feature: Build the thing"); + assert.equal(items[2]!.description, "Detailed instructions here"); +}); + +// --------------------------------------------------------------------------- +// Cost allocation tests +// --------------------------------------------------------------------------- + +test("allocateCosts: errors on empty items", () => { + const result = allocateCosts([], 100); + assert.equal(result.ok, false); +}); + +test("allocateCosts: errors on zero budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, 0); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no positive budget/); +}); + +test("allocateCosts: errors on negative budget", () => { + const items = parsePlan("- Step"); + const result = allocateCosts(items, -5); + assert.equal(result.ok, false); +}); + +test("allocateCosts: uses explicit costs as-is", () => { + const items = parsePlan("- Step A (cost: 10)\n- Step B (cost: 20)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 20); +}); + +test("allocateCosts: distributes remaining budget evenly", () => { + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); + assert.equal(result.items[1]!.cost, 10); + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: distributes remainder cents to first items", () => { + // $10.01 across 3 items → 3.34, 3.34, 3.33 (1 extra cent to first two) + const items = parsePlan("- A\n- B\n- C"); + const result = allocateCosts(items, 10.01); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 1001); + assert.equal(result.items[0]!.cost, 3.34); + assert.equal(result.items[1]!.cost, 3.34); + assert.equal(result.items[2]!.cost, 3.33); +}); + +test("allocateCosts: mixes explicit and auto-distributed costs", () => { + const items = parsePlan("- A (cost: 10)\n- B\n- C"); + const result = allocateCosts(items, 30); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 10); // explicit + assert.equal(result.items[1]!.cost, 10); // (30 - 10) / 2 + assert.equal(result.items[2]!.cost, 10); +}); + +test("allocateCosts: total equals budget when all auto", () => { + const items = parsePlan("- A\n- B"); + const result = allocateCosts(items, 50); + assert.equal(result.ok, true); + if (!result.ok) return; + const totalCents = result.items.reduce((s, i) => s + Math.round(i.cost * 100), 0); + assert.equal(totalCents, 5000); +}); + +test("allocateCosts: errors if explicit costs exceed budget", () => { + const items = parsePlan("- A (cost: 60)\n- B (cost: 50)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /exceed/); +}); + +test("allocateCosts: errors if no budget left for unspecified items", () => { + const items = parsePlan("- A (cost: 100)\n- B"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, false); + if (result.ok) return; + assert.match(result.error, /no budget/); +}); + +test("allocateCosts: preserves assignee and reviewer on items", () => { + const items = parsePlan("- Task (cost: 10, assignee: coder, reviewer: lead)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.assignee, "coder"); + assert.equal(result.items[0]!.reviewer, "lead"); +}); + +test("allocateCosts: preserves skipAnalysis flag", () => { + const items = parsePlan("- Task (cost: 10, skip-analysis: true)"); + const result = allocateCosts(items, 100); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.skipAnalysis, true); +}); + +test("allocateCosts: single item gets full budget when no cost specified", () => { + const items = parsePlan("- Single task"); + const result = allocateCosts(items, 25); + assert.equal(result.ok, true); + if (!result.ok) return; + assert.equal(result.items[0]!.cost, 25); +}); diff --git a/docs/task-cli-spec.md b/docs/task-cli-spec.md index 661d03a..af2dfb8 100644 --- a/docs/task-cli-spec.md +++ b/docs/task-cli-spec.md @@ -510,6 +510,105 @@ Abort a pending decomposition session without committing. task decompose cancel ``` +#### `task plan` / `task decompose plan` — one-shot plan-based decomposition + +Submit a complete decomposition in one command from a structured plan text. +Uses the same `POST /tasks/:id/decompose` endpoint as a regular commit, so +children appear in the dashboard immediately. + +`task plan` is the low-friction shortcut for agents; `task decompose plan` is +its explicit decomposition-scoped equivalent. + +``` +task plan "" +task plan --file +task plan --stdin + [--strategy sequential|parallel] (default: sequential) + +# Equivalent forms +task decompose plan --items "" +task decompose plan --file +task decompose plan --stdin + [--strategy sequential|parallel] (default: sequential) +``` + +**Plan format:** + +```markdown +# Optional Section Heading +- [ ] Checklist step title (cost: 10, assignee: coder) +- [x] Already-checked step (cost: 5) +1. Ordered list step +- Plain bullet step + Indented lines (2+ spaces) become the child description. + Multiple continuation lines are joined with newlines. +``` + +Supported item types: checklist (`- [ ]`), ordered (`1.`), plain bullet (`-`). + +Headings (`#`, `##`, etc.) set a context prefix applied to all following items +until the next heading: `"Section: Step title"`. + +**Trailing metadata** (optional, at end of item line): + +``` +(cost: 15, assignee: coder, reviewer: overseer, skip-analysis: true) +``` + +Supported keys: `cost`, `assignee`, `reviewer`, `skip-analysis`. + +**Plan editing / propagation semantics (v1):** + +- The imported list order becomes the child task order. With the default + `sequential` strategy, that order also drives which child is ready first. +- Checkbox state in the source markdown is **not** treated as task completion. + Checked and unchecked items both import as child tasks; completion is tracked + by taskcore child state after materialization. +- Before materialization, edit the markdown and re-run `task plan`. +- After materialization, the child tasks become the durable source of truth + shown in taskcore/dashboard. If the remaining plan needs to change, create a + new decomposition version for the remaining work rather than mutating + completed child history in place. + +**Cost allocation:** + +- Items with explicit `cost` keep that value. +- Items without `cost` share the remaining budget evenly (integer-cent + arithmetic — no float drift). +- Errors clearly if explicit costs exceed the parent's remaining budget, or + if there is no budget left for uncosted items. + +**Example:** + +```markdown +# Infrastructure +- [ ] Provision database (cost: 15, assignee: infra) +- [ ] Configure networking (cost: 10, assignee: infra) + +# Application +- [ ] Implement API endpoints (assignee: coder, reviewer: lead) + Build REST endpoints for user CRUD operations. +- [ ] Write integration tests +``` + +``` +task plan --file plan.md --strategy sequential +``` + +**Output:** + +``` +--- Plan materialized for T42 --- + +Created 4 child tasks: + T101: Infrastructure: Provision database 15.00 + T102: Infrastructure: Configure networking 10.00 + T103: Application: Implement API endpoints 12.50 + T104: Application: Write integration tests 12.50 + +Parent task is now a coordinator. Children will execute the plan. +``` + ### 3.6 Review Workflow (guided, multi-step) When a task is in the `review` phase and claimed by a reviewer. diff --git a/middle/prompt.ts b/middle/prompt.ts index 99790ab..32e0c95 100644 --- a/middle/prompt.ts +++ b/middle/prompt.ts @@ -412,7 +412,8 @@ function appendDecompositionInstructions(sections: string[], task: Task, config: sections.push(""); sections.push("## How to Submit Your Decomposition"); sections.push(""); - sections.push("Use the incremental decompose CLI — it guides you step by step:"); + sections.push("Preferred when you already have a checklist: use the one-shot plan flow (`task plan --file plan.md` or `task decompose plan --stdin`)."); + sections.push("If you need to build children incrementally, use the step-by-step decompose flow below:"); sections.push(""); sections.push("```bash"); sections.push(`# Step 1: Start a decomposition session`); From eec171cbaf80dc1e2e880ce167df9a4b654fa73e Mon Sep 17 00:00:00 2001 From: krandder Date: Wed, 18 Mar 2026 10:35:30 +0000 Subject: [PATCH 7/8] T2564: fix claim worktree branch fallback --- middle/journal.ts | 1 + middle/test/http.test.ts | 46 ++++++++++++++++++++++++++ middle/test/worktree.test.ts | 63 ++++++++++++++++++++++++++++++++++++ middle/worktree.ts | 38 +++++++++++++++++++--- 4 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 middle/test/worktree.test.ts diff --git a/middle/journal.ts b/middle/journal.ts index bb77768..d840023 100644 --- a/middle/journal.ts +++ b/middle/journal.ts @@ -255,6 +255,7 @@ function git(cwd: string, args: string[]): string { cwd, encoding: "utf-8", timeout: 10_000, + stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, GIT_TERMINAL_PROMPT: "0", diff --git a/middle/test/http.test.ts b/middle/test/http.test.ts index f73e336..f9e3ae9 100644 --- a/middle/test/http.test.ts +++ b/middle/test/http.test.ts @@ -1,3 +1,4 @@ +import { execFileSync } from "node:child_process"; import * as http from "node:http"; import { test, describe, beforeEach, afterEach } from "node:test"; import * as assert from "node:assert/strict"; @@ -110,6 +111,16 @@ async function teardown(): Promise { } } +function initRepo(repoPath: string): void { + fs.mkdirSync(repoPath, { recursive: true }); + execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" }); + fs.writeFileSync(path.join(repoPath, "README.md"), "# test\n", "utf-8"); + execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" }); +} + // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- @@ -462,6 +473,41 @@ describe("HTTP API", () => { assert.ok(readBody.content.includes("Starting implementation")); }); + test("claim falls back to repo HEAD when requested base branch is missing", async () => { + const repoPath = path.join(tmpDir, "code-repo"); + initRepo(repoPath); + + await request("POST", "/tasks", { + title: "Claim fallback test", + description: "Claim should still create a code worktree when base branch is missing", + assignee: "coder", + repo: repoPath, + baseBranch: "missing-base", + skipAnalysis: true, + }); + + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const claimBody = claimRes.body as { + workspace?: { codeWorktree?: string | null }; + warnings?: string[]; + }; + assert.ok(claimBody.workspace?.codeWorktree); + assert.deepEqual(claimBody.warnings ?? [], []); + + const codeWorktree = claimBody.workspace?.codeWorktree!; + assert.equal(fs.existsSync(codeWorktree), true); + assert.equal( + execFileSync("git", ["branch", "--show-current"], { cwd: codeWorktree, encoding: "utf-8" }).trim(), + "task/T1", + ); + assert.equal(fs.existsSync(path.join(codeWorktree, "README.md")), true); + }); + test("status done completes execution task directly when no reviewer is configured", async () => { await request("POST", "/tasks", { title: "Direct completion", diff --git a/middle/test/worktree.test.ts b/middle/test/worktree.test.ts new file mode 100644 index 0000000..dfa7b0a --- /dev/null +++ b/middle/test/worktree.test.ts @@ -0,0 +1,63 @@ +import { spawnSync } from "node:child_process"; +import * as assert from "node:assert/strict"; +import * as path from "node:path"; +import { test } from "node:test"; +import { fileURLToPath, pathToFileURL } from "node:url"; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../.."); +const journalModuleUrl = pathToFileURL(path.join(repoRoot, "middle/journal.ts")).href; +const worktreeModuleUrl = pathToFileURL(path.join(repoRoot, "middle/worktree.ts")).href; + +test("task workspace bootstrap stays silent on stderr for fresh branches", () => { + const script = [ + 'import * as fs from "node:fs";', + 'import * as os from "node:os";', + 'import * as path from "node:path";', + `import { initJournalRepo, createTaskBranch, taskBranch } from ${JSON.stringify(journalModuleUrl)};`, + `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`, + 'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-noise-"));', + 'const repoPath = path.join(tmpDir, "journal");', + 'initJournalRepo(repoPath);', + 'createTaskBranch(repoPath, "1");', + 'createWorktree(repoPath, path.join(tmpDir, "journal-T1"), taskBranch("1"));', + ].join("\n"); + + const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], { + cwd: repoRoot, + encoding: "utf-8", + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); + assert.equal(result.stderr.trim(), ""); +}); + +test("createWorktree falls back to repo HEAD when start point is missing", () => { + const script = [ + 'import { execFileSync } from "node:child_process";', + 'import * as fs from "node:fs";', + 'import * as os from "node:os";', + 'import * as path from "node:path";', + `import { createWorktree } from ${JSON.stringify(worktreeModuleUrl)};`, + 'const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "taskcore-worktree-fallback-"));', + 'const repoPath = path.join(tmpDir, "repo");', + 'fs.mkdirSync(repoPath, { recursive: true });', + 'execFileSync("git", ["init", "--initial-branch=main"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["config", "user.name", "Taskcore Tests"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["config", "user.email", "taskcore-tests@example.com"], { cwd: repoPath, stdio: "ignore" });', + 'fs.writeFileSync(path.join(repoPath, "README.md"), "# test\\n", "utf-8");', + 'execFileSync("git", ["add", "README.md"], { cwd: repoPath, stdio: "ignore" });', + 'execFileSync("git", ["commit", "-m", "init"], { cwd: repoPath, stdio: "ignore" });', + 'const worktreePath = path.join(tmpDir, "code-T1");', + 'createWorktree(repoPath, worktreePath, "task/T1", "missing-base");', + 'const branch = execFileSync("git", ["branch", "--show-current"], { cwd: worktreePath, encoding: "utf-8" }).trim();', + 'if (branch !== "task/T1") throw new Error(`unexpected branch ${branch}`);', + 'if (!fs.existsSync(path.join(worktreePath, "README.md"))) throw new Error("README.md missing from fallback worktree");', + ].join("\n"); + + const result = spawnSync(process.execPath, ["--import", "tsx", "-e", script], { + cwd: repoRoot, + encoding: "utf-8", + }); + + assert.equal(result.status, 0, result.stderr || result.stdout); +}); diff --git a/middle/worktree.ts b/middle/worktree.ts index 2a4d959..a1f11af 100644 --- a/middle/worktree.ts +++ b/middle/worktree.ts @@ -23,13 +23,20 @@ export function createWorktree( // the worktree doesn't have the keys yet. const useGitCrypt = fs.existsSync(path.join(repoPath, ".git", "git-crypt")); const noCheckout = useGitCrypt ? "--no-checkout" : null; + const resolvedStartPoint = startPoint && refExists(repoPath, startPoint) + ? startPoint + : undefined; + const createBranch = !branchExists(repoPath, branch); const addArgs = (withNewBranch: boolean): string[] => { const args = ["worktree", "add"]; if (noCheckout) args.push(noCheckout); + if (withNewBranch) { + args.push("-b", branch); + } args.push(worktreePath); - if (withNewBranch && startPoint) { - args.push("-b", branch, startPoint); + if (withNewBranch) { + if (resolvedStartPoint) args.push(resolvedStartPoint); } else { args.push(branch); } @@ -38,11 +45,11 @@ export function createWorktree( const tryAdd = (): void => { try { - git(repoPath, addArgs(!!startPoint)); + git(repoPath, addArgs(createBranch)); } catch (err) { const msg = String(err); // Branch may already exist — retry without -b - if (startPoint && msg.includes("already exists")) { + if (createBranch && msg.includes("already exists")) { git(repoPath, addArgs(false)); } else { throw err; @@ -205,12 +212,35 @@ export function cleanupStaleWorktrees( // Helpers // --------------------------------------------------------------------------- +function branchExists(repoPath: string, branch: string): boolean { + return refExists(repoPath, `refs/heads/${branch}`); +} + +function refExists(repoPath: string, ref: string): boolean { + try { + execFileSync("git", ["rev-parse", "--verify", "--quiet", `${ref}^{commit}`], { + cwd: repoPath, + encoding: "utf-8", + timeout: 30_000, + stdio: ["ignore", "pipe", "pipe"], + env: { + ...process.env, + GIT_TERMINAL_PROMPT: "0", + }, + }); + return true; + } catch { + return false; + } +} + /** Run a git command. */ function git(cwd: string, args: string[]): string { return execFileSync("git", args, { cwd, encoding: "utf-8", timeout: 30_000, + stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, GIT_TERMINAL_PROMPT: "0", From 7e363c6729ebccb7b3c040e4147355d929dae07d Mon Sep 17 00:00:00 2001 From: "kas.eth" Date: Wed, 18 Mar 2026 12:20:28 -0300 Subject: [PATCH 8/8] Fix leaked active leases and restore lease summary fields (#71) Co-authored-by: codex.41 [OpenClaw] --- core/clock.ts | 31 +++++++++-- core/test/scenarios.test.ts | 43 +++++++++++++++ core/test/validator.test.ts | 28 ++++++++++ core/validator.ts | 13 +++++ middle/http.ts | 24 +++++--- middle/test/http.test.ts | 44 +++++++++++++++ scripts/recover-leaked-leases.ts | 94 ++++++++++++++++++++++++++++++++ 7 files changed, 265 insertions(+), 12 deletions(-) create mode 100644 scripts/recover-leaked-leases.ts diff --git a/core/clock.ts b/core/clock.ts index f77ff86..4809ff5 100644 --- a/core/clock.ts +++ b/core/clock.ts @@ -8,6 +8,7 @@ import { type Event, type LeaseExpired, type PhaseTransition, + type RetryScheduled, type SystemState, type Task, type TaskExhausted, @@ -18,6 +19,14 @@ function backoffDue(task: Task, now: number): boolean { return task.condition === "retryWait" && task.retryAfter !== null && task.retryAfter <= now; } +function hasActiveAgent(task: Task): boolean { + return typeof task.leasedTo === "string" && task.leasedTo.trim().length > 0; +} + +function malformedActiveLease(task: Task): boolean { + return task.condition === "active" && (!hasActiveAgent(task) || task.leaseExpiresAt === null); +} + function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: boolean; anyDone: boolean } { if (task.children.length === 0) { return { allTerminal: false, anyDone: false }; @@ -39,6 +48,7 @@ function childrenCompleteReady(state: SystemState, task: Task): { allTerminal: b export class CoreClock { private readonly sourceId: string; + private static readonly LEAKED_LEASE_BACKOFF_MS = 1_000; public constructor(sourceId = "core-clock") { this.sourceId = sourceId; @@ -103,11 +113,22 @@ export class CoreClock { } } - if ( - task.condition === "active" && - task.leaseExpiresAt !== null && - task.leaseExpiresAt <= now - ) { + if (malformedActiveLease(task)) { + const retryScheduled: RetryScheduled = { + type: "RetryScheduled", + taskId: task.id, + ts: now, + fenceToken: task.currentFenceToken, + reason: "orphaned_on_restart", + retryAfter: now + CoreClock.LEAKED_LEASE_BACKOFF_MS, + phase: task.phase, + attemptNumber: Math.max(1, task.attempts[task.phase].used), + }; + due.push(retryScheduled); + continue; + } + + if (task.condition === "active" && task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) { const leaseExpired: LeaseExpired = { type: "LeaseExpired", taskId: task.id, diff --git a/core/test/scenarios.test.ts b/core/test/scenarios.test.ts index 1ac82a1..a585171 100644 --- a/core/test/scenarios.test.ts +++ b/core/test/scenarios.test.ts @@ -1405,6 +1405,49 @@ test("Scenario S: LeaseExpired auto-emission transitions leased task to retryWai assert.equal(state.tasks.T1800?.leasedTo, null); }); +test("Scenario S2: malformed active task with missing lease is retried by the clock", () => { + let state = createInitialState(); + state = mustReduce(state, createTask("T1801", 1)); + state = mustReduce(state, lease("T1801", 2, 1, "analysis", "analyst", "a-1801")); + + state.tasks.T1801!.leaseExpiresAt = null; + + const clock = new CoreClock(); + const due = clock.collectDueEvents(state, 10_000); + const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1801"); + assert.ok(retryScheduled); + if (!retryScheduled || retryScheduled.type !== "RetryScheduled") { + assert.fail("expected RetryScheduled"); + } + assert.equal(retryScheduled.reason, "orphaned_on_restart"); + + state = mustReduce(state, retryScheduled); + assert.equal(state.tasks.T1801?.condition, "retryWait"); + assert.equal(state.tasks.T1801?.leasedTo, null); + assert.equal(state.tasks.T1801?.leaseExpiresAt, null); +}); + +test("Scenario S3: malformed active task with empty agent is retried by the clock", () => { + let state = createInitialState(); + state = mustReduce(state, createTask("T1802", 1)); + state = mustReduce(state, lease("T1802", 2, 1, "analysis", "analyst", "a-1802")); + + state.tasks.T1802!.leasedTo = ""; + + const clock = new CoreClock(); + const due = clock.collectDueEvents(state, 10_000); + const retryScheduled = due.find((event) => event.type === "RetryScheduled" && event.taskId === "T1802"); + assert.ok(retryScheduled); + if (!retryScheduled || retryScheduled.type !== "RetryScheduled") { + assert.fail("expected RetryScheduled"); + } + assert.equal(retryScheduled.reason, "orphaned_on_restart"); + + state = mustReduce(state, retryScheduled); + assert.equal(state.tasks.T1802?.condition, "retryWait"); + assert.equal(state.tasks.T1802?.leasedTo, null); +}); + test("Scenario T: WaitResolved(block) blocks task and propagates summary to parent", () => { let state = createInitialState(); state = mustReduce(state, createTask("T1900", 1)); diff --git a/core/test/validator.test.ts b/core/test/validator.test.ts index eb27d25..4638cc9 100644 --- a/core/test/validator.test.ts +++ b/core/test/validator.test.ts @@ -63,6 +63,34 @@ test("validator rejects non-monotonic fence token", () => { assert.equal(error.code, "fence_not_monotonic"); }); +test("validator rejects LeaseGranted with empty agent id", () => { + const state = bootstrapState(); + + const invalidLease: Event = { + type: "LeaseGranted", + taskId: "T10", + ts: 2, + fenceToken: 1, + agentId: " ", + phase: "analysis", + leaseTimeout: 60_000, + sessionId: "sess-1", + sessionType: "fresh", + contextBudget: 512, + agentContext: { + sessionId: "sess-1", + agentId: " ", + memoryRef: null, + contextTokens: null, + modelId: "test", + }, + }; + + const error = validateEvent(state, invalidLease); + assert.ok(error); + assert.equal(error.code, "invalid_agent_id"); +}); + test("validator enforces failure summary on TaskFailed", () => { const state = bootstrapState(); diff --git a/core/validator.ts b/core/validator.ts index 5c6d4fd..eb7ec6f 100644 --- a/core/validator.ts +++ b/core/validator.ts @@ -321,6 +321,14 @@ function validateSessionPolicy( return null; } +function validateLeaseAgent(event: Extract): ValidationError | null { + if (!nonEmptyText(event.agentId)) { + return mkError(event, "invalid_agent_id", "LeaseGranted.agentId must be non-empty."); + } + + return null; +} + function validateWaitAction(event: Extract): ValidationError | null { if (event.action === "block" && !validFailureSummary(event.summary)) { return mkError( @@ -493,6 +501,11 @@ export function validateEvent(state: SystemState, event: Event): ValidationError return mkError(event, "invalid_context_budget", "Context budget must be a positive integer."); } + const agentError = validateLeaseAgent(event); + if (agentError) { + return agentError; + } + const attempt = task.attempts[event.phase]; if (attempt.used >= attempt.max) { return mkError(event, "attempt_budget_exhausted", "Attempt budget exhausted for current phase.", { diff --git a/middle/http.ts b/middle/http.ts index 2278a5f..6c471a3 100644 --- a/middle/http.ts +++ b/middle/http.ts @@ -712,11 +712,15 @@ function handleClaimTask( return { status: 409, body: { error: "terminal_task", message: `Task ${taskId} is terminal` } }; } - const agentId = typeof b.agentId === "string" && b.agentId.trim().length > 0 - ? b.agentId.trim() - : typeof b.agent === "string" && b.agent.trim().length > 0 - ? b.agent.trim() - : "unknown"; + const requestedAgentId = typeof b.agentId === "string" + ? b.agentId + : typeof b.agent === "string" + ? b.agent + : null; + if (requestedAgentId !== null && requestedAgentId.trim().length === 0) { + return { status: 400, body: { error: "invalid_agent_id", message: "agentId must be non-empty." } }; + } + const agentId = requestedAgentId?.trim() || "unknown"; const fenceToken = task.currentFenceToken + 1; const sessionId = crypto.randomUUID(); @@ -971,8 +975,11 @@ function handleListTasks( assignee: t.metadata["assignee"] ?? null, reviewer: t.metadata["reviewer"] ?? null, priority: t.metadata["priority"] ?? "medium", + activeAgent: t.leasedTo, + leaseExpiresAt: t.leaseExpiresAt, createdAt: t.createdAt, updatedAt: t.updatedAt, + updatedAtMs: t.updatedAt, })); return { status: 200, body: { tasks: summaries } }; @@ -2900,8 +2907,11 @@ function collectAttentionTasks(core: Core): { (t) => !t.terminal && t.condition === "active" && - t.leaseExpiresAt !== null && - t.leaseExpiresAt <= now, + ( + t.leaseExpiresAt === null || + (typeof t.leasedTo !== "string" || t.leasedTo.trim().length === 0) || + t.leaseExpiresAt <= now + ), ) .map(toSummary); diff --git a/middle/test/http.test.ts b/middle/test/http.test.ts index f9e3ae9..af87f5b 100644 --- a/middle/test/http.test.ts +++ b/middle/test/http.test.ts @@ -220,6 +220,35 @@ describe("HTTP API", () => { assert.equal(body.tasks.length, 2); }); + test("GET /tasks summaries include active agent and lease metadata", async () => { + await request("POST", "/tasks", { + title: "Leased task", + description: "Should expose lease info in summary", + assignee: "coder", + }); + const claimRes = await request("POST", "/tasks/1/claim", { + agentId: "coder", + source: "test", + }); + assert.equal(claimRes.status, 200); + + const res = await request("GET", "/tasks"); + assert.equal(res.status, 200); + const body = res.body as { + tasks: Array<{ + id: string; + activeAgent: string | null; + leaseExpiresAt: number | null; + updatedAtMs: number | null; + }>; + }; + const task = body.tasks.find((entry) => entry.id === "1"); + assert.ok(task); + assert.equal(task.activeAgent, "coder"); + assert.equal(typeof task.leaseExpiresAt, "number"); + assert.equal(task.updatedAtMs !== null && task.updatedAtMs > 0, true); + }); + test("GET /dispatchable lists dispatchable tasks", async () => { await request("POST", "/tasks", { title: "Ready task", @@ -243,6 +272,21 @@ describe("HTTP API", () => { assert.equal(res.status, 404); }); + test("POST /tasks/:id/claim rejects blank agent id", async () => { + await request("POST", "/tasks", { + title: "Claim validation", + description: "Blank claim agent should fail", + }); + + const res = await request("POST", "/tasks/1/claim", { + agentId: " ", + source: "test", + }); + assert.equal(res.status, 400); + const body = res.body as { error: string }; + assert.equal(body.error, "invalid_agent_id"); + }); + test("PATCH /tasks/:id/metadata updates priority", async () => { await request("POST", "/tasks", { title: "Metadata test", diff --git a/scripts/recover-leaked-leases.ts b/scripts/recover-leaked-leases.ts new file mode 100644 index 0000000..781f918 --- /dev/null +++ b/scripts/recover-leaked-leases.ts @@ -0,0 +1,94 @@ +type Phase = "analysis" | "decomposition" | "execution" | "review"; + +type AttemptBudget = { used: number; max: number }; + +type TaskRecord = { + id: string; + title: string; + phase: Phase | null; + condition: string | null; + terminal: string | null; + leasedTo: string | null; + leaseExpiresAt: number | null; + currentFenceToken: number; + attempts: Record; +}; + +type TasksResponse = { tasks: TaskRecord[] }; + +const apiBase = process.env["TASKCORE_BASE_URL"] ?? "http://127.0.0.1:18800"; +const dryRun = process.argv.includes("--dry-run"); +const retryDelayMs = 1_000; + +function nonEmptyText(value: string | null | undefined): value is string { + return typeof value === "string" && value.trim().length > 0; +} + +function leakedLeaseReason(task: TaskRecord, now: number): "lease_expired" | "orphaned_on_restart" { + if (task.leaseExpiresAt !== null && task.leaseExpiresAt <= now) { + return "lease_expired"; + } + return "orphaned_on_restart"; +} + +function attemptNumber(task: TaskRecord, phase: Phase): number { + return Math.max(1, task.attempts[phase]?.used ?? 0); +} + +async function main(): Promise { + const now = Date.now(); + const response = await fetch(`${apiBase}/tasks?full=true`); + if (!response.ok) { + throw new Error(`Failed to fetch tasks: ${response.status} ${response.statusText}`); + } + + const body = await response.json() as TasksResponse; + const leaked = body.tasks.filter((task) => + task.terminal === null && + task.phase !== null && + task.condition === "active" && + (!nonEmptyText(task.leasedTo) || task.leaseExpiresAt === null || task.leaseExpiresAt <= now) + ); + + if (leaked.length === 0) { + process.stdout.write("No leaked active leases found.\n"); + return; + } + + for (const task of leaked) { + const reason = leakedLeaseReason(task, now); + const payload = { + type: "RetryScheduled", + taskId: task.id, + ts: Date.now(), + fenceToken: task.currentFenceToken, + reason, + retryAfter: Date.now() + retryDelayMs, + phase: task.phase, + attemptNumber: attemptNumber(task, task.phase), + }; + + if (dryRun) { + process.stdout.write(`[dry-run] would recover T${task.id} (${task.title}) with ${reason}\n`); + continue; + } + + const recoverResponse = await fetch(`${apiBase}/tasks/${task.id}/events`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + if (!recoverResponse.ok) { + const errorBody = await recoverResponse.text(); + throw new Error(`Failed to recover T${task.id}: ${recoverResponse.status} ${errorBody}`); + } + + process.stdout.write(`Recovered T${task.id} (${task.title}) with ${reason}\n`); + } +} + +void main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n`); + process.exitCode = 1; +});