Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 108 additions & 23 deletions scripts/ci/benchmark-performance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Outputs structured JSON with mean, median, p95, p99 per metric.
*/

import { execSync, ExecSyncOptions } from "child_process";
import { execSync, ExecSyncOptions, spawn, ChildProcess } from "child_process";

// ── Configuration ──────────────────────────────────────────────────

Expand Down Expand Up @@ -159,45 +159,130 @@ function benchmarkHttpsLatency(): BenchmarkResult {
return { metric: "squid_https_latency", unit: "ms", values, ...stats(values) };
}

function benchmarkMemory(): BenchmarkResult {
/**
* Wait for Docker containers to be running, polling at 500ms intervals.
* Uses exact name matching (anchored regex) to avoid false positives from
* containers with similar names (e.g., "awf-squid-old").
* Throws if containers are not running within timeoutMs.
*/
function waitForContainers(containerNames: string[], timeoutMs: number): Promise<void> {
const start = Date.now();
return new Promise((resolve, reject) => {
const poll = (): void => {
if (Date.now() - start > timeoutMs) {
reject(new Error(`Containers not running after ${timeoutMs}ms`));
return;
}
try {
const allRunning = containerNames.every((name) => {
const result = execSync(
`sudo docker ps --filter name=^${name}$ --filter status=running --format '{{.Names}}' 2>/dev/null`,
{ encoding: "utf-8", timeout: 5_000 }
)
.trim()
.split("\n")
.map((n) => n.trim())
.filter(Boolean);
return result.some((n) => n === name);
});
if (allRunning) {
resolve();
return;
}
} catch {
// container not ready yet
}
setTimeout(poll, 500);
};
poll();
});
}

/**
* Parse a Docker memory usage string like "123.4MiB / 7.773GiB" into MB.
*/
function parseMb(s: string): number {
const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i);
if (!match) return 0;
const val = parseFloat(match[1]);
const unit = match[2].toLowerCase();
if (unit === "gib") return val * 1024;
if (unit === "kib") return val / 1024;
return val;
}

/**
* Kill a spawned background process and its entire process group, best-effort.
* Sends SIGTERM then SIGKILL to the process group so descendant processes
* (e.g., sudo, awf, docker) don't survive.
*/
function killBackground(child: ChildProcess): void {
const pid = child.pid;
if (!pid) return;

try {
// SIGTERM the process group to allow graceful shutdown
process.kill(-pid, "SIGTERM");
} catch {
// Process group may have already exited
}

try {
Comment on lines +223 to +230
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

killBackground() sends SIGTERM to the process group but then only SIGKILLs the direct child PID. If the group spawns additional processes that ignore SIGTERM, they can survive. Consider sending SIGKILL to the process group as well (optionally after a short grace period), instead of only killing the child process.

Suggested change
try {
if (child.pid) {
// Kill the process group (negative PID) to catch child processes
process.kill(-child.pid, "SIGTERM");
}
} catch {
// Process may have already exited
}
try {
const pid = child.pid;
if (!pid) {
return;
}
try {
// Kill the process group (negative PID) to catch child processes
process.kill(-pid, "SIGTERM");
} catch {
// Process group may have already exited
}
try {
// Force-kill the entire process group so descendant processes do not survive
process.kill(-pid, "SIGKILL");
} catch {
// Process group may have already exited
}
try {
// Best-effort fallback for the direct child process

Copilot uses AI. Check for mistakes.
// SIGKILL the entire process group to ensure nothing survives
process.kill(-pid, "SIGKILL");
} catch {
// Process group may have already exited
}
}

async function benchmarkMemory(): Promise<BenchmarkResult> {
console.error(" Benchmarking memory footprint...");
const values: number[] = [];

for (let i = 0; i < ITERATIONS; i++) {
cleanup();
// Start containers, measure memory, then stop
let child: ChildProcess | null = null;
try {
// Run a sleep command so containers stay up, then check memory
const output = exec(
`${AWF_CMD} --allow-domains ${ALLOWED_DOMAIN} --log-level error --keep-containers -- ` +
`echo measuring_memory`
// Start awf with a long-running command in the background so containers stay alive.
// Derive spawn args from AWF_CMD to stay consistent with the rest of the script.
const awfParts = AWF_CMD.split(/\s+/);
child = spawn(
awfParts[0],
[...awfParts.slice(1), "--allow-domains", ALLOWED_DOMAIN, "--log-level", "error", "--", "sleep", "30"],
{
Comment on lines +249 to +252
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This path hard-codes sudo + awf in spawn(...) even though the script already defines AWF_CMD = "sudo awf" and uses it elsewhere. To avoid future drift (e.g., if AWF_CMD changes), consider deriving the spawned command/args from AWF_CMD or otherwise reusing that constant.

Copilot uses AI. Check for mistakes.
detached: true,
stdio: "ignore",
}
);
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the child process is spawned with detached: true and stdio: 'ignore', it’s typically a good idea to call child.unref() after spawning. This prevents the parent benchmark script from being kept alive if cleanup/kill logic fails in an edge case.

Suggested change
);
);
child.unref();

Copilot uses AI. Check for mistakes.
// Get memory stats for both containers
// Unref so the parent process won't be kept alive if cleanup fails
child.unref();

// Wait for both containers to be running (up to 30s)
await waitForContainers(["awf-squid", "awf-agent"], 30_000);

// Give containers a moment to stabilize memory usage
await new Promise((resolve) => setTimeout(resolve, 2000));

Comment on lines +260 to +265
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The benchmark proceeds once containers are merely status=running. Since Squid has a Docker healthcheck (and the agent depends on it), sampling memory before Squid is healthy can make the metric noisy and can mask startup failures/restarts. Consider waiting for Squid to be healthy (e.g., via docker ps --filter health=healthy or docker inspect health status) before taking the memory sample.

Copilot uses AI. Check for mistakes.
// Get memory stats while containers are alive
const squidMem = exec(
"sudo docker stats awf-squid --no-stream --format '{{.MemUsage}}' 2>/dev/null || echo '0MiB'"
);
const agentMem = exec(
"sudo docker stats awf-agent --no-stream --format '{{.MemUsage}}' 2>/dev/null || echo '0MiB'"
);

// Parse memory values (format: "123.4MiB / 7.773GiB")
const parseMb = (s: string): number => {
const match = s.match(/([\d.]+)\s*(MiB|GiB|KiB)/i);
if (!match) return 0;
const val = parseFloat(match[1]);
const unit = match[2].toLowerCase();
if (unit === "gib") return val * 1024;
if (unit === "kib") return val / 1024;
return val;
};

const totalMb = Math.round(parseMb(squidMem) + parseMb(agentMem));
values.push(totalMb);
console.error(` Iteration ${i + 1}/${ITERATIONS}: ${totalMb}MB (squid: ${squidMem}, agent: ${agentMem})`);
} catch {
console.error(` Iteration ${i + 1}/${ITERATIONS}: failed (skipped)`);
} catch (err) {
console.error(` Iteration ${i + 1}/${ITERATIONS}: failed (skipped) - ${err}`);
} finally {
// Always clean up the background process and containers
if (child) {
killBackground(child);
}
cleanup();
}
cleanup();
}

if (values.length === 0) {
Expand Down Expand Up @@ -248,7 +333,7 @@ async function main(): Promise<void> {
results.push(benchmarkWarmStart());
results.push(benchmarkColdStart());
results.push(benchmarkHttpsLatency());
results.push(benchmarkMemory());
results.push(await benchmarkMemory());

// Final cleanup
cleanup();
Expand Down
Loading