From 5b781ea3823779947a704157809f3f857c4f4d8b Mon Sep 17 00:00:00 2001 From: Liao Shiwu Date: Tue, 2 Jun 2026 16:16:43 +0800 Subject: [PATCH 1/3] feat(openclaw): pass retain context guidance to prevent routing metadata misattribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hindsight's fact extraction LLM was misinterpreting routing identifiers (sender open_id, bank ID, channel, provider) as semantic actors, project names, or organizations. After many conversation turns, the bank name (e.g. saber-prod) would override the actual project being discussed (e.g. x-power-cli). This adds interpretation guidance via the retain API 'context' field: - New DEFAULT_RETAIN_CONTEXT constant explains that [context] block sender/channel/provider are routing identifiers, not human names - Bank IDs, session keys, agent IDs, thread IDs, and tags are also marked as operational routing identifiers, not project names - Assistant-role first-person statements are attributed to the AI - Context is passed through the full chain: buildRetainRequest → scopeClient.retain → Hindsight SDK API - RetainQueue persists and flushes context correctly - Backfill CLI also passes context - New 'retainContext' config option allows customization includeSenderContext behavior is unchanged; the [context] block remains in transcript content, but extraction LLM now knows how to interpret it. 7 files changed, 97 insertions(+). --- hindsight-integrations/openclaw/README.md | 3 + .../openclaw/openclaw.plugin.json | 128 +++++++++++++++--- .../openclaw/src/backfill.ts | 3 +- .../openclaw/src/index.test.ts | 55 ++++++++ hindsight-integrations/openclaw/src/index.ts | 23 ++++ .../openclaw/src/retain-queue.ts | 3 + hindsight-integrations/openclaw/src/types.ts | 2 + 7 files changed, 195 insertions(+), 22 deletions(-) diff --git a/hindsight-integrations/openclaw/README.md b/hindsight-integrations/openclaw/README.md index f2d573789..9e68b7fe0 100644 --- a/hindsight-integrations/openclaw/README.md +++ b/hindsight-integrations/openclaw/README.md @@ -95,6 +95,7 @@ Optional settings in `~/.openclaw/openclaw.json` under `plugins.entries.hindsigh | `bankIdPrefix` | — | Prefix for bank IDs (e.g. `"prod"`) | | `retainTags` | `[]` | Tags applied to every retained document, useful for cross-agent/source labeling (e.g. `source_system:openclaw`, `agent:agentname`). Auto-retain also merges inline per-message tags from `...` or `...` blocks in user messages. | | `retainSource` | `"openclaw"` | `source` value written into retained document metadata | +| `retainContext` | built-in OpenClaw guidance | Interpretation guidance sent through the Hindsight retain API `context` field. The default tells the extraction LLM that `[context]` sender/channel/provider values, bank IDs, session keys, source systems, and tags are operational routing metadata, not human names, project names, or organizations. | | `dynamicBankGranularity` | `["agent", "channel", "user"]` | Fields used to derive bank ID. Options: `agent`, `channel`, `user`, `provider` | | `excludeProviders` | `["heartbeat"]` | Message providers to skip for recall/retain (e.g. `heartbeat`, `slack`, `telegram`, `discord`) | | `autoRecall` | `true` | Auto-inject memories before each turn. Set to `false` when the agent has its own recall tool. | @@ -162,6 +163,8 @@ Glob syntax: Retained documents use stable session-scoped IDs derived from the OpenClaw `sessionKey`. By default (`retainDocumentScope: 'session'`) every retain in a session shares one document id like `openclaw:agent:agentname:discord:channel:123`, so all turns of the conversation accumulate under a single Hindsight document. Set `retainDocumentScope: 'turn'` to fall back to the per-retain ids (`...:turn:000001`, `...:window:000002` for chunked retention). Either way, retained documents include richer metadata such as `session_key`, `agent_id`, `provider`, `channel_id`, `thread_id`, `sender_id`, `turn_index`, and `retention_scope`. Each message in the retained JSON also carries a structured `timestamp` field (ISO 8601) lifted from OpenClaw's per-message time, so facts are not polluted by inline weekday/date prefixes. +`retainContext` is sent separately from the transcript content and gives Hindsight's extraction LLM interpretation guidance for the retained document. The default is designed for OpenClaw transcripts: it explains that the optional `[context]` block is routing metadata, that assistant-role first-person statements belong to the AI assistant, and that bank IDs or tags should not be treated as the discussed project. This does not remove or change `includeSenderContext`; set `includeSenderContext: false` only if you want to omit the transcript's `[context]` block itself. + ## Documentation For full documentation, configuration options, troubleshooting, and development guide, see: diff --git a/hindsight-integrations/openclaw/openclaw.plugin.json b/hindsight-integrations/openclaw/openclaw.plugin.json index 7a67369c4..be38d22db 100644 --- a/hindsight-integrations/openclaw/openclaw.plugin.json +++ b/hindsight-integrations/openclaw/openclaw.plugin.json @@ -5,8 +5,14 @@ "configContracts": { "secretInputs": { "paths": [ - { "path": "llmApiKey", "expected": "string" }, - { "path": "hindsightApiToken", "expected": "string" } + { + "path": "llmApiKey", + "expected": "string" + }, + { + "path": "hindsightApiToken", + "expected": "string" + } ] } }, @@ -56,14 +62,25 @@ "llmProvider": { "type": "string", "description": "LLM provider for Hindsight memory (e.g. 'openai', 'anthropic', 'gemini', 'groq', 'ollama', 'openai-codex', 'claude-code').", - "enum": ["openai", "anthropic", "gemini", "groq", "ollama", "openai-codex", "claude-code"] + "enum": [ + "openai", + "anthropic", + "gemini", + "groq", + "ollama", + "openai-codex", + "claude-code" + ] }, "llmModel": { "type": "string", "description": "LLM model to use (e.g. 'gpt-4o-mini', 'claude-3-5-haiku-20241022'). Used with llmProvider." }, "llmApiKey": { - "type": ["string", "object"], + "type": [ + "string", + "object" + ], "description": "API key for the LLM provider used by the Hindsight memory daemon. Set via 'openclaw config set ... --ref-source env --ref-id OPENAI_API_KEY' to reference an env var without storing plaintext." }, "llmBaseUrl": { @@ -84,7 +101,10 @@ "description": "External Hindsight API URL (e.g. 'https://mcp.hindsight.devcraft.team'). When set, skips local daemon and connects directly to this API." }, "hindsightApiToken": { - "type": ["string", "object"], + "type": [ + "string", + "object" + ], "description": "API token for external Hindsight API authentication. Required if the external API has authentication enabled." }, "dynamicBankId": { @@ -112,6 +132,11 @@ "description": "Source value written into retained document metadata. Defaults to 'openclaw'.", "default": "openclaw" }, + "retainContext": { + "type": "string", + "description": "Interpretation guidance sent via the Hindsight retain API context field. It tells the extraction LLM that OpenClaw sender/channel/provider values, bank IDs, session keys, source systems, and tags are operational routing metadata, not human names, project names, or organizations.", + "default": "This content is an AI-assistant conversation transcript from OpenClaw. The [context] block at the beginning of each turn contains routing identifiers: 'sender' is an opaque user ID (not a human name), 'channel' is a chat identifier, 'provider' is the messaging platform name (e.g. feishu, slack). These are operational routing metadata, not semantic actors or people. Messages with role 'assistant' are from the AI assistant; first-person statements in assistant messages refer to the AI, not the human user. Messages with role 'user' are from the human user. Bank IDs, session keys, agent IDs, thread IDs, source systems, and tags in metadata are also operational routing identifiers, not human names, project names, or organizations." + }, "autoRecall": { "type": "boolean", "description": "Automatically recall memories on every prompt and inject them as context. Set to false when agent has its own recall tool.", @@ -123,16 +148,27 @@ "type": "string" }, "description": "Message providers to exclude from recall and retain (e.g. ['heartbeat', 'telegram', 'discord'])", - "default": ["heartbeat"] + "default": [ + "heartbeat" + ] }, "dynamicBankGranularity": { "type": "array", "items": { "type": "string", - "enum": ["agent", "channel", "user", "provider"] + "enum": [ + "agent", + "channel", + "user", + "provider" + ] }, "description": "Fields used to derive bank ID. Controls memory isolation granularity. Default: ['agent', 'channel', 'user'].", - "default": ["agent", "channel", "user"] + "default": [ + "agent", + "channel", + "user" + ] }, "autoRetain": { "type": "boolean", @@ -143,15 +179,26 @@ "type": "array", "items": { "type": "string", - "enum": ["user", "assistant", "system", "tool"] + "enum": [ + "user", + "assistant", + "system", + "tool" + ] }, "description": "Message roles to include in retained transcript. Default: ['user', 'assistant'].", - "default": ["user", "assistant"] + "default": [ + "user", + "assistant" + ] }, "retainFormat": { "type": "string", "description": "Serialization format for retained conversation content. 'json' (default) emits a structured array of {role, content} messages, matching the Claude Code integration. 'text' emits the legacy '[role: x] ... [x:end]' markers.", - "enum": ["json", "text"], + "enum": [ + "json", + "text" + ], "default": "json" }, "retainToolCalls": { @@ -179,13 +226,20 @@ "retainDocumentScope": { "type": "string", "description": "Granularity of the retained document_id. 'session' (default) groups all retains under a single document per OpenClaw session. 'turn' produces a new document per retain.", - "enum": ["session", "turn"], + "enum": [ + "session", + "turn" + ], "default": "session" }, "recallBudget": { "type": "string", "description": "Recall effort level. Higher budgets use more retrieval strategies for better results but take longer.", - "enum": ["low", "mid", "high"], + "enum": [ + "low", + "mid", + "high" + ], "default": "mid" }, "recallMaxTokens": { @@ -198,19 +252,33 @@ "type": "array", "items": { "type": "string", - "enum": ["world", "experience", "observation"] + "enum": [ + "world", + "experience", + "observation" + ] }, "description": "Memory types to recall. Defaults to ['observation'] — surfaces only the consolidated, deduplicated view; raw world/experience facts can drive the same answer multiple times when many memories say the same thing.", - "default": ["observation"] + "default": [ + "observation" + ] }, "recallRoles": { "type": "array", "items": { "type": "string", - "enum": ["user", "assistant", "system", "tool"] + "enum": [ + "user", + "assistant", + "system", + "tool" + ] }, "description": "Roles to include when composing contextual recall query. Default: ['user', 'assistant'].", - "default": ["user", "assistant"] + "default": [ + "user", + "assistant" + ] }, "recallContextTurns": { "type": "integer", @@ -242,7 +310,11 @@ }, "recallInjectionPosition": { "type": "string", - "enum": ["prepend", "append", "user"], + "enum": [ + "prepend", + "append", + "user" + ], "description": "Where to inject recalled memories. 'prepend' = start of system prompt (default), 'append' = end of system prompt (preserves prompt cache), 'user' = before user message.", "default": "prepend" }, @@ -254,7 +326,13 @@ "logLevel": { "type": "string", "description": "Console log verbosity. 'off' = no output, 'error' = errors only, 'warning' = errors + warnings, 'info' = key events + periodic summaries, 'debug' = all details.", - "enum": ["off", "error", "warning", "info", "debug"], + "enum": [ + "off", + "error", + "warning", + "info", + "debug" + ], "default": "info" }, "logSummaryIntervalMs": { @@ -283,12 +361,16 @@ }, "ignoreSessionPatterns": { "type": "array", - "items": { "type": "string" }, + "items": { + "type": "string" + }, "description": "Session key glob patterns to skip entirely (no recall, no retain). E.g. [\"agent:main:**\", \"agent:*:cron:**\"]. * matches non-colon chars, ** matches anything." }, "statelessSessionPatterns": { "type": "array", - "items": { "type": "string" }, + "items": { + "type": "string" + }, "description": "Session key glob patterns for read-only sessions: retain is always skipped, recall is skipped when skipStatelessSessions is true. E.g. [\"agent:*:subagent:**\", \"agent:*:heartbeat:**\"]." }, "skipStatelessSessions": { @@ -383,6 +465,10 @@ "label": "Retain Source", "placeholder": "openclaw" }, + "retainContext": { + "label": "Retain Context", + "placeholder": "Guidance for interpreting retained OpenClaw transcripts" + }, "autoRecall": { "label": "Auto-Recall", "placeholder": "true (inject memories on every prompt)" diff --git a/hindsight-integrations/openclaw/src/backfill.ts b/hindsight-integrations/openclaw/src/backfill.ts index eea034076..8befd03a6 100644 --- a/hindsight-integrations/openclaw/src/backfill.ts +++ b/hindsight-integrations/openclaw/src/backfill.ts @@ -17,7 +17,7 @@ function loadPackageVersion(): string { } const USER_AGENT = `hindsight-openclaw/${loadPackageVersion()}`; -import { detectExternalApi, detectLLMConfig } from "./index.js"; +import { DEFAULT_RETAIN_CONTEXT, detectExternalApi, detectLLMConfig } from "./index.js"; import type { BankStats, PluginConfig } from "./types.js"; import { buildBackfillPlan, @@ -547,6 +547,7 @@ export async function runCli(argv: string[] = process.argv.slice(2)): Promise { expect(request).toEqual({ content: "hello world", documentId: "openclaw:agent:main:main", + context: DEFAULT_RETAIN_CONTEXT, metadata: { retained_at: expect.any(String), message_count: "2", @@ -385,6 +387,38 @@ describe("buildRetainRequest", () => { }); }); + it("includes the default retain context guidance", () => { + const request = buildRetainRequest( + "hello world", + 1, + {}, + {}, + 1700000000000, + { turnIndex: 1 } + ); + + expect(request.context).toBe(DEFAULT_RETAIN_CONTEXT); + }); + + it("describes routing metadata and assistant/user roles in the default retain context", () => { + expect(DEFAULT_RETAIN_CONTEXT).toContain("routing identifiers"); + expect(DEFAULT_RETAIN_CONTEXT).toContain("operational routing identifiers"); + expect(DEFAULT_RETAIN_CONTEXT).toContain("AI assistant"); + }); + + it("uses a configured retain context when provided", () => { + const request = buildRetainRequest( + "hello world", + 1, + {}, + { retainContext: "Custom extraction guidance." }, + 1700000000000, + { turnIndex: 1 } + ); + + expect(request.context).toBe("Custom extraction guidance."); + }); + it("falls back to per-turn doc id when appendSupported is false (older API)", () => { const request = buildRetainRequest( "hello world", @@ -1499,3 +1533,24 @@ describe("getPluginConfig — mission semantics (#1270, #1353)", () => { expect(cfg.observationsMission).toBeUndefined(); }); }); + +describe("getPluginConfig — retainContext", () => { + it("defaults retainContext to the built-in OpenClaw transcript guidance", () => { + const cfg = getPluginConfig(makeApi({})); + expect(cfg.retainContext).toBe(DEFAULT_RETAIN_CONTEXT); + }); + + it("passes through an explicit non-empty retainContext", () => { + const cfg = getPluginConfig(makeApi({ retainContext: "Treat IDs as routing metadata." })); + expect(cfg.retainContext).toBe("Treat IDs as routing metadata."); + }); + + it("falls back to the default when retainContext is blank or non-string", () => { + expect(getPluginConfig(makeApi({ retainContext: "" })).retainContext).toBe( + DEFAULT_RETAIN_CONTEXT + ); + expect(getPluginConfig(makeApi({ retainContext: 42 })).retainContext).toBe( + DEFAULT_RETAIN_CONTEXT + ); + }); +}); diff --git a/hindsight-integrations/openclaw/src/index.ts b/hindsight-integrations/openclaw/src/index.ts index d46fa15ee..56e6532a3 100644 --- a/hindsight-integrations/openclaw/src/index.ts +++ b/hindsight-integrations/openclaw/src/index.ts @@ -32,6 +32,19 @@ function loadPackageVersion(): string { const USER_AGENT = `hindsight-openclaw/${loadPackageVersion()}`; +export const DEFAULT_RETAIN_CONTEXT = + "This content is an AI-assistant conversation transcript from OpenClaw. " + + "The [context] block at the beginning of each turn contains routing identifiers: " + + "'sender' is an opaque user ID (not a human name), 'channel' is a chat identifier, " + + "'provider' is the messaging platform name (e.g. feishu, slack). " + + "These are operational routing metadata, not semantic actors or people. " + + "Messages with role 'assistant' are from the AI assistant; first-person statements " + + "in assistant messages refer to the AI, not the human user. " + + "Messages with role 'user' are from the human user. " + + "Bank IDs, session keys, agent IDs, thread IDs, source systems, " + + "and tags in metadata are also operational routing identifiers, " + + "not human names, project names, or organizations."; + // Logger adapter that routes the embed wrapper's output through openclaw's // batched structured logger so messages share the same prefix and respect // the configured log level. @@ -114,6 +127,7 @@ function scopeClient(c: HindsightClient, bankId: string): BankScopedClient { async retain(req) { await c.retain(bankId, req.content, { documentId: req.documentId, + context: req.context, metadata: toStringMetadata(req.metadata), tags: req.tags, updateMode: req.updateMode, @@ -284,6 +298,7 @@ async function flushRetainQueue(): Promise { try { await client.retain(item.bankId, item.content, { documentId: item.documentId, + context: item.context, metadata: toStringMetadata(item.metadata), tags: item.tags, updateMode: item.updateMode, @@ -1462,6 +1477,10 @@ export function getPluginConfig(api: MoltbotPluginAPI): PluginConfig { typeof config.retainSource === "string" && config.retainSource.trim().length > 0 ? config.retainSource.trim() : undefined, + retainContext: + typeof config.retainContext === "string" && config.retainContext.trim().length > 0 + ? config.retainContext + : DEFAULT_RETAIN_CONTEXT, excludeProviders: Array.isArray(config.excludeProviders) ? Array.from( new Set([ @@ -2715,6 +2734,10 @@ export function buildRetainRequest( return { content: transcript, documentId: documentId, + context: + typeof pluginConfig.retainContext === "string" && pluginConfig.retainContext.trim().length > 0 + ? pluginConfig.retainContext + : DEFAULT_RETAIN_CONTEXT, metadata: { retained_at: new Date(now).toISOString(), message_count: String(messageCount), diff --git a/hindsight-integrations/openclaw/src/retain-queue.ts b/hindsight-integrations/openclaw/src/retain-queue.ts index 33fb69444..c6f8eb971 100644 --- a/hindsight-integrations/openclaw/src/retain-queue.ts +++ b/hindsight-integrations/openclaw/src/retain-queue.ts @@ -22,6 +22,7 @@ import { randomBytes } from "crypto"; export interface QueuedRetainPayload { content: string; documentId?: string; + context?: string; metadata?: Record; tags?: string[]; updateMode?: "replace" | "append"; @@ -32,6 +33,7 @@ export interface QueuedRetain { bankId: string; content: string; documentId: string; + context?: string; metadata: Record; tags?: string[]; updateMode?: "replace" | "append"; @@ -63,6 +65,7 @@ export class RetainQueue { bankId, content: request.content, documentId: request.documentId || "conversation", + context: request.context, metadata: metadata || request.metadata || {}, tags: request.tags, updateMode: request.updateMode, diff --git a/hindsight-integrations/openclaw/src/types.ts b/hindsight-integrations/openclaw/src/types.ts index 46b7bc7bf..0e4b8be26 100644 --- a/hindsight-integrations/openclaw/src/types.ts +++ b/hindsight-integrations/openclaw/src/types.ts @@ -96,6 +96,7 @@ export interface PluginConfig { bankIdPrefix?: string; // Prefix for bank IDs (e.g. 'prod' -> 'prod-slack-C123') retainTags?: string[]; // Tags applied to all retained documents after trimming and deduplication; auto-retain merges these with inline per-message retain-tag directives (e.g. ['source_system:openclaw', 'agent:agentname']) retainSource?: string; // Source written into retained document metadata (default: 'openclaw') + retainContext?: string; // Interpretation guidance sent via the retain API context field. Defaults to built-in OpenClaw transcript/routing metadata guidance. excludeProviders?: string[]; // Message providers to exclude from recall/retain (e.g. ['telegram', 'discord']) autoRecall?: boolean; // Auto-recall memories on every prompt (default: true). Set to false when agent has its own recall tool. dynamicBankGranularity?: Array<"agent" | "provider" | "channel" | "user">; // Fields for bank ID derivation. Default: ['agent', 'channel', 'user'] @@ -165,6 +166,7 @@ export type { export interface RetainRequest { content: string; documentId?: string; + context?: string; metadata?: Record; tags?: string[]; /** From d91349afe2a2097b1d675442b926e445f5a92d6a Mon Sep 17 00:00:00 2001 From: Liao Shiwu Date: Tue, 2 Jun 2026 16:34:48 +0800 Subject: [PATCH 2/3] fix(openclaw): remove platform-specific examples from DEFAULT_RETAIN_CONTEXT --- hindsight-integrations/openclaw/README.md | 2 +- .../openclaw/openclaw.plugin.json | 103 +++--------------- .../openclaw/src/index.test.ts | 9 +- hindsight-integrations/openclaw/src/index.ts | 2 +- 4 files changed, 21 insertions(+), 95 deletions(-) diff --git a/hindsight-integrations/openclaw/README.md b/hindsight-integrations/openclaw/README.md index 9e68b7fe0..5c1e12d6a 100644 --- a/hindsight-integrations/openclaw/README.md +++ b/hindsight-integrations/openclaw/README.md @@ -95,7 +95,7 @@ Optional settings in `~/.openclaw/openclaw.json` under `plugins.entries.hindsigh | `bankIdPrefix` | — | Prefix for bank IDs (e.g. `"prod"`) | | `retainTags` | `[]` | Tags applied to every retained document, useful for cross-agent/source labeling (e.g. `source_system:openclaw`, `agent:agentname`). Auto-retain also merges inline per-message tags from `...` or `...` blocks in user messages. | | `retainSource` | `"openclaw"` | `source` value written into retained document metadata | -| `retainContext` | built-in OpenClaw guidance | Interpretation guidance sent through the Hindsight retain API `context` field. The default tells the extraction LLM that `[context]` sender/channel/provider values, bank IDs, session keys, source systems, and tags are operational routing metadata, not human names, project names, or organizations. | +| `retainContext` | built-in OpenClaw guidance | Interpretation guidance sent through the Hindsight retain API `context` field. The default tells the extraction LLM that `[context]` sender/channel/provider values, bank IDs, session keys, source systems, and tags are operational routing metadata, not human names, project names, or organizations. | | `dynamicBankGranularity` | `["agent", "channel", "user"]` | Fields used to derive bank ID. Options: `agent`, `channel`, `user`, `provider` | | `excludeProviders` | `["heartbeat"]` | Message providers to skip for recall/retain (e.g. `heartbeat`, `slack`, `telegram`, `discord`) | | `autoRecall` | `true` | Auto-inject memories before each turn. Set to `false` when the agent has its own recall tool. | diff --git a/hindsight-integrations/openclaw/openclaw.plugin.json b/hindsight-integrations/openclaw/openclaw.plugin.json index be38d22db..fc8087007 100644 --- a/hindsight-integrations/openclaw/openclaw.plugin.json +++ b/hindsight-integrations/openclaw/openclaw.plugin.json @@ -62,25 +62,14 @@ "llmProvider": { "type": "string", "description": "LLM provider for Hindsight memory (e.g. 'openai', 'anthropic', 'gemini', 'groq', 'ollama', 'openai-codex', 'claude-code').", - "enum": [ - "openai", - "anthropic", - "gemini", - "groq", - "ollama", - "openai-codex", - "claude-code" - ] + "enum": ["openai", "anthropic", "gemini", "groq", "ollama", "openai-codex", "claude-code"] }, "llmModel": { "type": "string", "description": "LLM model to use (e.g. 'gpt-4o-mini', 'claude-3-5-haiku-20241022'). Used with llmProvider." }, "llmApiKey": { - "type": [ - "string", - "object" - ], + "type": ["string", "object"], "description": "API key for the LLM provider used by the Hindsight memory daemon. Set via 'openclaw config set ... --ref-source env --ref-id OPENAI_API_KEY' to reference an env var without storing plaintext." }, "llmBaseUrl": { @@ -101,10 +90,7 @@ "description": "External Hindsight API URL (e.g. 'https://mcp.hindsight.devcraft.team'). When set, skips local daemon and connects directly to this API." }, "hindsightApiToken": { - "type": [ - "string", - "object" - ], + "type": ["string", "object"], "description": "API token for external Hindsight API authentication. Required if the external API has authentication enabled." }, "dynamicBankId": { @@ -135,7 +121,7 @@ "retainContext": { "type": "string", "description": "Interpretation guidance sent via the Hindsight retain API context field. It tells the extraction LLM that OpenClaw sender/channel/provider values, bank IDs, session keys, source systems, and tags are operational routing metadata, not human names, project names, or organizations.", - "default": "This content is an AI-assistant conversation transcript from OpenClaw. The [context] block at the beginning of each turn contains routing identifiers: 'sender' is an opaque user ID (not a human name), 'channel' is a chat identifier, 'provider' is the messaging platform name (e.g. feishu, slack). These are operational routing metadata, not semantic actors or people. Messages with role 'assistant' are from the AI assistant; first-person statements in assistant messages refer to the AI, not the human user. Messages with role 'user' are from the human user. Bank IDs, session keys, agent IDs, thread IDs, source systems, and tags in metadata are also operational routing identifiers, not human names, project names, or organizations." + "default": "This content is an AI-assistant conversation transcript from OpenClaw. The [context] block at the beginning of each turn contains routing identifiers: 'sender' is an opaque user ID (not a human name), 'channel' is a chat identifier, 'provider' is the messaging platform name. These are operational routing metadata, not semantic actors or people. Messages with role 'assistant' are from the AI assistant; first-person statements in assistant messages refer to the AI, not the human user. Messages with role 'user' are from the human user. Bank IDs, session keys, agent IDs, thread IDs, source systems, and tags in metadata are also operational routing identifiers, not human names, project names, or organizations." }, "autoRecall": { "type": "boolean", @@ -148,27 +134,16 @@ "type": "string" }, "description": "Message providers to exclude from recall and retain (e.g. ['heartbeat', 'telegram', 'discord'])", - "default": [ - "heartbeat" - ] + "default": ["heartbeat"] }, "dynamicBankGranularity": { "type": "array", "items": { "type": "string", - "enum": [ - "agent", - "channel", - "user", - "provider" - ] + "enum": ["agent", "channel", "user", "provider"] }, "description": "Fields used to derive bank ID. Controls memory isolation granularity. Default: ['agent', 'channel', 'user'].", - "default": [ - "agent", - "channel", - "user" - ] + "default": ["agent", "channel", "user"] }, "autoRetain": { "type": "boolean", @@ -179,26 +154,15 @@ "type": "array", "items": { "type": "string", - "enum": [ - "user", - "assistant", - "system", - "tool" - ] + "enum": ["user", "assistant", "system", "tool"] }, "description": "Message roles to include in retained transcript. Default: ['user', 'assistant'].", - "default": [ - "user", - "assistant" - ] + "default": ["user", "assistant"] }, "retainFormat": { "type": "string", "description": "Serialization format for retained conversation content. 'json' (default) emits a structured array of {role, content} messages, matching the Claude Code integration. 'text' emits the legacy '[role: x] ... [x:end]' markers.", - "enum": [ - "json", - "text" - ], + "enum": ["json", "text"], "default": "json" }, "retainToolCalls": { @@ -226,20 +190,13 @@ "retainDocumentScope": { "type": "string", "description": "Granularity of the retained document_id. 'session' (default) groups all retains under a single document per OpenClaw session. 'turn' produces a new document per retain.", - "enum": [ - "session", - "turn" - ], + "enum": ["session", "turn"], "default": "session" }, "recallBudget": { "type": "string", "description": "Recall effort level. Higher budgets use more retrieval strategies for better results but take longer.", - "enum": [ - "low", - "mid", - "high" - ], + "enum": ["low", "mid", "high"], "default": "mid" }, "recallMaxTokens": { @@ -252,33 +209,19 @@ "type": "array", "items": { "type": "string", - "enum": [ - "world", - "experience", - "observation" - ] + "enum": ["world", "experience", "observation"] }, "description": "Memory types to recall. Defaults to ['observation'] — surfaces only the consolidated, deduplicated view; raw world/experience facts can drive the same answer multiple times when many memories say the same thing.", - "default": [ - "observation" - ] + "default": ["observation"] }, "recallRoles": { "type": "array", "items": { "type": "string", - "enum": [ - "user", - "assistant", - "system", - "tool" - ] + "enum": ["user", "assistant", "system", "tool"] }, "description": "Roles to include when composing contextual recall query. Default: ['user', 'assistant'].", - "default": [ - "user", - "assistant" - ] + "default": ["user", "assistant"] }, "recallContextTurns": { "type": "integer", @@ -310,11 +253,7 @@ }, "recallInjectionPosition": { "type": "string", - "enum": [ - "prepend", - "append", - "user" - ], + "enum": ["prepend", "append", "user"], "description": "Where to inject recalled memories. 'prepend' = start of system prompt (default), 'append' = end of system prompt (preserves prompt cache), 'user' = before user message.", "default": "prepend" }, @@ -326,13 +265,7 @@ "logLevel": { "type": "string", "description": "Console log verbosity. 'off' = no output, 'error' = errors only, 'warning' = errors + warnings, 'info' = key events + periodic summaries, 'debug' = all details.", - "enum": [ - "off", - "error", - "warning", - "info", - "debug" - ], + "enum": ["off", "error", "warning", "info", "debug"], "default": "info" }, "logSummaryIntervalMs": { diff --git a/hindsight-integrations/openclaw/src/index.test.ts b/hindsight-integrations/openclaw/src/index.test.ts index 5601e8f38..b2865509f 100644 --- a/hindsight-integrations/openclaw/src/index.test.ts +++ b/hindsight-integrations/openclaw/src/index.test.ts @@ -388,14 +388,7 @@ describe("buildRetainRequest", () => { }); it("includes the default retain context guidance", () => { - const request = buildRetainRequest( - "hello world", - 1, - {}, - {}, - 1700000000000, - { turnIndex: 1 } - ); + const request = buildRetainRequest("hello world", 1, {}, {}, 1700000000000, { turnIndex: 1 }); expect(request.context).toBe(DEFAULT_RETAIN_CONTEXT); }); diff --git a/hindsight-integrations/openclaw/src/index.ts b/hindsight-integrations/openclaw/src/index.ts index 56e6532a3..1148025ba 100644 --- a/hindsight-integrations/openclaw/src/index.ts +++ b/hindsight-integrations/openclaw/src/index.ts @@ -36,7 +36,7 @@ export const DEFAULT_RETAIN_CONTEXT = "This content is an AI-assistant conversation transcript from OpenClaw. " + "The [context] block at the beginning of each turn contains routing identifiers: " + "'sender' is an opaque user ID (not a human name), 'channel' is a chat identifier, " + - "'provider' is the messaging platform name (e.g. feishu, slack). " + + "'provider' is the messaging platform name. " + "These are operational routing metadata, not semantic actors or people. " + "Messages with role 'assistant' are from the AI assistant; first-person statements " + "in assistant messages refer to the AI, not the human user. " + From 4667d84742efdb599bc93ae2c523ff5217924acd Mon Sep 17 00:00:00 2001 From: Liao Shiwu Date: Thu, 4 Jun 2026 11:25:57 +0800 Subject: [PATCH 3/3] test(openclaw): harden retain context handling --- .../openclaw/src/backfill.test.ts | 3 +- .../openclaw/src/index.test.ts | 36 +++++++++++++++++++ hindsight-integrations/openclaw/src/index.ts | 4 +-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/hindsight-integrations/openclaw/src/backfill.test.ts b/hindsight-integrations/openclaw/src/backfill.test.ts index 789e77077..92f922b14 100644 --- a/hindsight-integrations/openclaw/src/backfill.test.ts +++ b/hindsight-integrations/openclaw/src/backfill.test.ts @@ -197,8 +197,9 @@ describe("backfill helpers", () => { it("treats a symlinked bin path as direct execution", async () => { const { isDirectExecution } = await import("./backfill.js"); const dir = mkdtempSync(join(tmpdir(), "hindsight-backfill-bin-")); - const modulePath = join(process.cwd(), "dist", "backfill.js"); + const modulePath = join(dir, "backfill.js"); const symlinkPath = join(dir, "hindsight-openclaw-backfill"); + writeFileSync(modulePath, "#!/usr/bin/env node\n", "utf8"); symlinkSync(modulePath, symlinkPath); const moduleUrl = pathToFileURL(modulePath).href; diff --git a/hindsight-integrations/openclaw/src/index.test.ts b/hindsight-integrations/openclaw/src/index.test.ts index b2865509f..3c940edca 100644 --- a/hindsight-integrations/openclaw/src/index.test.ts +++ b/hindsight-integrations/openclaw/src/index.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect } from "vitest"; +import { createRequire } from "module"; import { stripMemoryTags, extractRecallQuery, @@ -29,6 +30,17 @@ import { } from "./index.js"; import type { PluginConfig, MemoryResult, MoltbotPluginAPI } from "./types.js"; +const require = createRequire(import.meta.url); +const openclawManifest = require("../openclaw.plugin.json") as { + configSchema?: { + properties?: { + retainContext?: { + default?: string; + }; + }; + }; +}; + // --------------------------------------------------------------------------- // stripMemoryTags // --------------------------------------------------------------------------- @@ -412,6 +424,19 @@ describe("buildRetainRequest", () => { expect(request.context).toBe("Custom extraction guidance."); }); + it("trims configured retain context before sending it", () => { + const request = buildRetainRequest( + "hello world", + 1, + {}, + { retainContext: " Custom extraction guidance. \n" }, + 1700000000000, + { turnIndex: 1 } + ); + + expect(request.context).toBe("Custom extraction guidance."); + }); + it("falls back to per-turn doc id when appendSupported is false (older API)", () => { const request = buildRetainRequest( "hello world", @@ -1538,6 +1563,11 @@ describe("getPluginConfig — retainContext", () => { expect(cfg.retainContext).toBe("Treat IDs as routing metadata."); }); + it("trims an explicit retainContext", () => { + const cfg = getPluginConfig(makeApi({ retainContext: " Treat IDs as routing metadata. \n" })); + expect(cfg.retainContext).toBe("Treat IDs as routing metadata."); + }); + it("falls back to the default when retainContext is blank or non-string", () => { expect(getPluginConfig(makeApi({ retainContext: "" })).retainContext).toBe( DEFAULT_RETAIN_CONTEXT @@ -1546,4 +1576,10 @@ describe("getPluginConfig — retainContext", () => { DEFAULT_RETAIN_CONTEXT ); }); + + it("keeps the plugin manifest default in sync with the code default", () => { + expect(openclawManifest.configSchema?.properties?.retainContext?.default).toBe( + DEFAULT_RETAIN_CONTEXT + ); + }); }); diff --git a/hindsight-integrations/openclaw/src/index.ts b/hindsight-integrations/openclaw/src/index.ts index 1148025ba..3feba9272 100644 --- a/hindsight-integrations/openclaw/src/index.ts +++ b/hindsight-integrations/openclaw/src/index.ts @@ -1479,7 +1479,7 @@ export function getPluginConfig(api: MoltbotPluginAPI): PluginConfig { : undefined, retainContext: typeof config.retainContext === "string" && config.retainContext.trim().length > 0 - ? config.retainContext + ? config.retainContext.trim() : DEFAULT_RETAIN_CONTEXT, excludeProviders: Array.isArray(config.excludeProviders) ? Array.from( @@ -2736,7 +2736,7 @@ export function buildRetainRequest( documentId: documentId, context: typeof pluginConfig.retainContext === "string" && pluginConfig.retainContext.trim().length > 0 - ? pluginConfig.retainContext + ? pluginConfig.retainContext.trim() : DEFAULT_RETAIN_CONTEXT, metadata: { retained_at: new Date(now).toISOString(),