Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions packages/agent-core/src/agent/context/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ export class ContextMemory {
toolCalls: [],
origin,
});
// When the user message contains video file references, remind the model
// to use ReadMediaFile instead of writing Python scripts to extract frames.
if (hasVideoTag(content)) {
this.appendSystemReminder(
'The user provided a video file. Use the ReadMediaFile tool to read and analyze the video content directly. Do not write Python scripts or other code to extract frames from the video.',
{ kind: 'injection', variant: 'host' },

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Remove video reminders when undoing their prompt

When a video prompt is later undone, this prompt-scoped reminder is left behind because undo() skips messages whose origin is kind: 'injection'. In the /undo scenario after a user submits <video ...></video>, the real user prompt is removed but this reminder remains in context, so the next model request can still be told that the user provided a video file and should use ReadMediaFile even though that prompt was undone.

Useful? React with 👍 / 👎.

);
}
}

appendSystemReminder(content: string, origin: PromptOrigin): void {
Expand Down Expand Up @@ -307,6 +315,14 @@ export class ContextMemory {
}
}

function hasVideoTag(content: readonly ContentPart[]): boolean {
return content.some(
(part) =>
part.type === 'text' &&
/<video\s+path="[^"]+"\s*>\s*<\/video>/.test(part.text),
);
}

function toolResultOutputForModel(result: ExecutableToolResult): string | ContentPart[] {
const output = result.output;
if (typeof output === 'string') {
Expand Down
40 changes: 40 additions & 0 deletions packages/agent-core/test/agent/context.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,45 @@ describe('Agent context', () => {
await ctx.expectResumeMatches();
});

it('adds a system reminder when user message contains a video tag', async () => {
const ctx = testAgent();
ctx.configure();

ctx.mockNextResponse({ type: 'text', text: 'got it' });
await ctx.rpc.prompt({
input: [
{ type: 'text', text: '分析这个视频 <video path="/tmp/test.mp4"></video>' },
],
});

await ctx.untilTurnEnd();
const lastCall = ctx.llmCalls.at(-1);
expect(lastCall).toBeDefined();
const allText = lastCall!.history
.map((m) => m.content.map((c) => (c.type === 'text' ? c.text : '')).join(''))
.join('');
expect(allText).toContain('The user provided a video file');
expect(allText).toContain('ReadMediaFile');
await ctx.expectResumeMatches();
});

it('does not add a video reminder when user message has no video tag', async () => {
const ctx = testAgent();
ctx.configure();

ctx.mockNextResponse({ type: 'text', text: 'ok' });
await ctx.rpc.prompt({ input: [{ type: 'text', text: 'hello' }] });

await ctx.untilTurnEnd();
const lastCall = ctx.llmCalls.at(-1);
expect(lastCall).toBeDefined();
const allText = lastCall!.history
.map((m) => m.content.map((c) => (c.type === 'text' ? c.text : '')).join(''))
.join('');
expect(allText).not.toContain('The user provided a video file');
await ctx.expectResumeMatches();
});

it('keeps system reminders separate from real user prompts', async () => {
const ctx = testAgent();
ctx.configure();
Expand All @@ -237,6 +276,7 @@ describe('Agent context', () => {
user: text "<system-reminder>\\nRemember the host note.\\n</system-reminder>"
user: text "Real user prompt"
`);
await ctx.expectResumeMatches();
});

it('defers system reminders until pending tool results are recorded and resumed', async () => {
Expand Down
Loading