diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index a437523b9be..37371dadd05 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -92,7 +92,7 @@ jobs: VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1" FLAT_LAYOUT_INLINE_ARRAY_NODE: "1" run: | - bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json + bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl - name: Setup AWS CLI uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6 @@ -105,6 +105,19 @@ jobs: run: | bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json + - name: Ingest results to v3 server + if: vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + python3 scripts/post-ingest.py results.v3.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "${{ matrix.benchmark.id }}" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" + - name: Alert incident.io if: failure() uses: ./.github/actions/alert-incident-io diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ce22431213..7da0f8a015a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -358,7 +358,8 @@ jobs: if: matrix.os == 'windows-x64' run: | cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast ` - --exclude vortex-bench --exclude vortex-python --exclude vortex-duckdb ` + --exclude vortex-bench --exclude vortex-bench-server ` + --exclude vortex-python --exclude vortex-duckdb ` --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-nvcomp ` --exclude vortex-cub --exclude vortex-test-e2e-cuda --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` diff --git a/.github/workflows/publish-bench-server.yml b/.github/workflows/publish-bench-server.yml new file mode 100644 index 00000000000..22aad1135b8 --- /dev/null +++ b/.github/workflows/publish-bench-server.yml @@ -0,0 +1,46 @@ +name: Publish Bench Server + +on: + push: + branches: [develop] + paths: + - "benchmarks-website/server/**" + - "vortex-bench/**" + - "Cargo.lock" + - ".github/workflows/publish-bench-server.yml" + workflow_dispatch: { } + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: write + id-token: write + steps: + - uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push + uses: docker/build-push-action@v7 + with: + context: . + file: ./benchmarks-website/server/Dockerfile + platforms: linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository }}/vortex-bench-server:latest + ghcr.io/${{ github.repository }}/vortex-bench-server:${{ github.sha }} diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 8be259fa562..fe77675fcbd 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -376,6 +376,7 @@ jobs: bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \ --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ + --gh-json-v3 results.v3.jsonl \ --no-build \ --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ @@ -395,6 +396,7 @@ jobs: bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \ --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ + --gh-json-v3 results.v3.jsonl \ --no-build \ --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ @@ -499,6 +501,19 @@ jobs: run: | bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json + - name: Ingest results to v3 server + if: inputs.mode == 'develop' && vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + python3 scripts/post-ingest.py results.v3.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "${{ matrix.id }}" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" + - name: Upload File Sizes if: inputs.mode == 'develop' && matrix.remote_storage == null shell: bash diff --git a/.github/workflows/v3-commit-metadata.yml b/.github/workflows/v3-commit-metadata.yml new file mode 100644 index 00000000000..2ddee9dc84d --- /dev/null +++ b/.github/workflows/v3-commit-metadata.yml @@ -0,0 +1,35 @@ +# Posts a v3 ingest envelope with no records on every push to develop, so the +# `commits` dim stays populated even when no benchmark ran. + +name: v3 commit metadata + +on: + push: + branches: [develop] + workflow_dispatch: { } + +permissions: + contents: read + +jobs: + commit-metadata: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 2 + + - name: Ingest commit metadata to v3 server + if: vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + echo -n > empty.jsonl + python3 scripts/post-ingest.py empty.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "commit-metadata" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" diff --git a/.gitignore b/.gitignore index 7fa79fb2162..bcc8ef746ee 100644 --- a/.gitignore +++ b/.gitignore @@ -242,3 +242,6 @@ trace*.pb # pytest-benchmark output vortex-python/.benchmarks/ +# For local benchmarks website server and things like the WAL +**.duckdb* +.bench-env diff --git a/Cargo.lock b/Cargo.lock index f9fd5e6e960..7d01aef5ba7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arc-swap" @@ -687,9 +690,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -900,6 +903,58 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "base16ct" version = "1.0.0" @@ -1025,9 +1080,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.4" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", @@ -1314,12 +1369,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - [[package]] name = "cexpr" version = "0.6.0" @@ -1606,10 +1655,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.2" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ + "crossterm 0.28.1", "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1652,10 +1702,11 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ + "brotli", "bzip2", "compression-core", "flate2", @@ -1667,9 +1718,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "concurrent-queue" @@ -1910,6 +1961,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "crossterm_winapi", + "parking_lot", + "rustix 0.38.44", + "winapi", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -3550,6 +3614,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -3660,6 +3735,25 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" +[[package]] +name = "duckdb" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" +dependencies = [ + "arrow 58.1.0", + "cast", + "comfy-table", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num", + "num-integer", + "rust_decimal", + "strum 0.27.2", +] + [[package]] name = "duckdb-bench" version = "0.1.0" @@ -3861,6 +3955,18 @@ dependencies = [ "ext-trait", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fast-float2" version = "0.2.3" @@ -4356,6 +4462,15 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "heck" version = "0.5.0" @@ -4428,6 +4543,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humansize" version = "2.1.3" @@ -4445,9 +4566,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hybrid-array" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" dependencies = [ "typenum", ] @@ -4466,6 +4587,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -4487,6 +4609,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -4654,9 +4777,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -4891,22 +5014,6 @@ dependencies = [ "jiff-tzdb", ] -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys 0.3.1", - "log", - "thiserror 1.0.69", - "walkdir", - "windows-sys 0.45.0", -] - [[package]] name = "jni" version = "0.22.4" @@ -4917,7 +5024,7 @@ dependencies = [ "combine", "java-locator", "jni-macros", - "jni-sys 0.4.1", + "jni-sys", "libloading 0.8.9", "log", "simd_cesu8", @@ -4939,15 +5046,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "jni-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" -dependencies = [ - "jni-sys 0.4.1", -] - [[package]] name = "jni-sys" version = "0.4.1" @@ -5634,9 +5732,26 @@ checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libduckdb-sys" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" +dependencies = [ + "cc", + "flate2", + "pkg-config", + "reqwest 0.12.28", + "serde", + "serde_json", + "tar", + "vcpkg", + "zip 6.0.0", +] [[package]] name = "libfuzzer-sys" @@ -5898,6 +6013,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -5911,6 +6032,30 @@ dependencies = [ "thread-tree", ] +[[package]] +name = "maud" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8156733e27020ea5c684db5beac5d1d611e1272ab17901a49466294b84fc217e" +dependencies = [ + "axum-core", + "http", + "itoa", + "maud_macros", +] + +[[package]] +name = "maud_macros" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7261b00f3952f617899bc012e3dbd56e4f0110a038175929fa5d18e5a19913ca" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.117", +] + [[package]] name = "md-5" version = "0.10.6" @@ -6221,6 +6366,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -6256,6 +6415,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -7064,6 +7245,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "version_check", +] + [[package]] name = "prost" version = "0.12.6" @@ -7595,7 +7788,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" dependencies = [ "cfg-if", - "crossterm", + "crossterm 0.29.0", "instability", "ratatui-core", ] @@ -7834,13 +8027,14 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", + "webpki-roots", ] [[package]] name = "reqwest" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" dependencies = [ "base64", "bytes", @@ -7864,6 +8058,8 @@ dependencies = [ "rustls", "rustls-pki-types", "rustls-platform-verifier", + "serde", + "serde_json", "sync_wrapper", "tokio", "tokio-rustls", @@ -8057,9 +8253,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.38" +version = "0.23.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +checksum = "7c2c118cb077cca2822033836dfb1b975355dfb784b5e8da48f7b6c5db74e60e" dependencies = [ "aws-lc-rs", "once_cell", @@ -8084,9 +8280,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -8094,13 +8290,13 @@ dependencies = [ [[package]] name = "rustls-platform-verifier" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni 0.21.1", + "jni", "log", "once_cell", "rustls", @@ -8339,6 +8535,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -9247,7 +9454,7 @@ dependencies = [ "chrono", "num_cpus", "ping", - "reqwest 0.13.2", + "reqwest 0.13.3", "sysinfo", "test-with-derive", "uzers", @@ -9268,7 +9475,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "syn 2.0.117", "sysinfo", "uzers", @@ -9609,6 +9816,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9632,6 +9840,7 @@ dependencies = [ "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9967,6 +10176,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vector-search-bench" version = "0.1.0" @@ -10157,6 +10372,7 @@ dependencies = [ "glob", "humansize", "indicatif", + "insta", "itertools 0.14.0", "mimalloc", "noodles-bgzf", @@ -10165,7 +10381,7 @@ dependencies = [ "parquet 58.1.0", "rand 0.10.1", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sysinfo", @@ -10185,6 +10401,54 @@ dependencies = [ "vortex-tensor", ] +[[package]] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "arrow-array 58.1.0", + "arrow-buffer 58.1.0", + "arrow-schema 58.1.0", + "clap", + "duckdb", + "flate2", + "reqwest 0.13.3", + "rstest", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-subscriber", + "vortex-bench-server", + "vortex-utils", +] + +[[package]] +name = "vortex-bench-server" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "axum", + "base64", + "duckdb", + "flate2", + "insta", + "maud", + "reqwest 0.13.3", + "serde", + "serde_json", + "subtle", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tower", + "tower-http", + "tracing", + "tracing-subscriber", + "twox-hash", +] + [[package]] name = "vortex-btrblocks" version = "0.1.0" @@ -10261,7 +10525,7 @@ dependencies = [ "clap", "futures", "parquet 58.1.0", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sha2 0.11.0", @@ -10443,7 +10707,7 @@ dependencies = [ "object_store 0.13.2", "parking_lot", "paste", - "reqwest 0.13.2", + "reqwest 0.13.3", "rstest", "tempfile", "tracing", @@ -10453,7 +10717,7 @@ dependencies = [ "vortex-runend", "vortex-sequence", "vortex-utils", - "zip", + "zip 8.6.0", ] [[package]] @@ -10665,7 +10929,7 @@ dependencies = [ "arrow-array 58.1.0", "arrow-schema 58.1.0", "futures", - "jni 0.22.4", + "jni", "object_store 0.13.2", "parking_lot", "thiserror 2.0.18", @@ -10746,7 +11010,7 @@ dependencies = [ "bindgen", "libloading 0.8.9", "liblzma", - "reqwest 0.13.2", + "reqwest 0.13.3", "tar", "vortex-cuda-macros", ] @@ -10957,7 +11221,7 @@ dependencies = [ "arrow-schema 58.1.0", "clap", "console_error_panic_hook", - "crossterm", + "crossterm 0.29.0", "datafusion 53.1.0", "env_logger", "flatbuffers", @@ -11090,6 +11354,7 @@ dependencies = [ "cfg-if", "once_cell", "rustversion", + "serde", "wasm-bindgen-macro", "wasm-bindgen-shared", ] @@ -11225,6 +11490,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "8.0.2" @@ -11377,15 +11651,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -11422,21 +11687,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -11479,12 +11729,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -11497,12 +11741,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -11515,12 +11753,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -11545,12 +11777,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -11563,12 +11789,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -11581,12 +11801,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -11599,12 +11813,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -11902,6 +12110,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + [[package]] name = "zip" version = "8.6.0" diff --git a/Cargo.toml b/Cargo.toml index 6ca4281c8a9..4126243aa8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,9 @@ members = [ "benchmarks/duckdb-bench", "benchmarks/random-access-bench", "benchmarks/vector-search-bench", + # Benchmarks website v3 (alpha) - leaf binary, not part of vortex-* API + "benchmarks-website/server", + "benchmarks-website/migrate", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" diff --git a/REUSE.toml b/REUSE.toml index 161f6e3086a..8e406c95c90 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -36,7 +36,7 @@ SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" [[annotations]] -path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties"] +path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties", "**.duckdb*"] precedence = "override" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "Apache-2.0" diff --git a/_typos.toml b/_typos.toml index 62c3b0d6358..e2a5ff330b8 100644 --- a/_typos.toml +++ b/_typos.toml @@ -8,7 +8,7 @@ extend-ignore-re = [ ] [files] -extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs"] +extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs", "benchmarks-website/server/static/**", "benchmarks-website/server/tests/snapshots/**"] [type.py] extend-ignore-identifiers-re = [ diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py index d497d85ed13..6c200015182 100644 --- a/bench-orchestrator/bench_orchestrator/cli.py +++ b/bench-orchestrator/bench_orchestrator/cli.py @@ -210,6 +210,10 @@ def run( Path | None, typer.Option("--output", help="Optional path for compatibility JSONL output"), ] = None, + gh_json_v3: Annotated[ + Path | None, + typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"), + ] = None, options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None, ) -> None: """Run benchmarks with specified configuration.""" @@ -294,6 +298,7 @@ def run( sample_rate=sample_rate, tracing=tracing, runner=runner, + gh_json_v3=gh_json_v3, on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: ( write_result_line( line, diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py index b895afdc2e1..32ed9c91132 100644 --- a/bench-orchestrator/bench_orchestrator/runner/executor.py +++ b/bench-orchestrator/bench_orchestrator/runner/executor.py @@ -40,6 +40,7 @@ def build_command( sample_rate: int | None = None, tracing: bool = False, runner: str | None = None, + gh_json_v3: Path | None = None, ) -> list[str]: """Build the command used to execute a benchmark binary.""" cmd = [ @@ -67,6 +68,8 @@ def build_command( cmd.append("--tracing") if runner: cmd.extend(["--runner", runner]) + if gh_json_v3 is not None: + cmd.extend(["--gh-json-v3", str(gh_json_v3)]) if options: for key, value in options.items(): cmd.extend(["--opt", f"{key}={value}"]) @@ -98,6 +101,7 @@ def run( sample_rate: int | None = None, tracing: bool = False, runner: str | None = None, + gh_json_v3: Path | None = None, on_result: Callable[[str], None] | None = None, ) -> list[str]: """ @@ -128,6 +132,7 @@ def run( sample_rate=sample_rate, tracing=tracing, runner=runner, + gh_json_v3=gh_json_v3, ) if self.verbose: diff --git a/bench-orchestrator/tests/test_executor.py b/bench-orchestrator/tests/test_executor.py index ade3dde1a67..dd3253a22ff 100644 --- a/bench-orchestrator/tests/test_executor.py +++ b/bench-orchestrator/tests/test_executor.py @@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None: assert "1,3" in cmd +def test_build_command_includes_gh_json_v3_when_set() -> None: + executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB) + + cmd = executor.build_command( + benchmark=Benchmark.TPCH, + formats=[Format.PARQUET], + gh_json_v3=Path("results.v3.jsonl"), + ) + + assert "--gh-json-v3" in cmd + flag_idx = cmd.index("--gh-json-v3") + assert cmd[flag_idx + 1] == "results.v3.jsonl" + + +def test_build_command_omits_gh_json_v3_when_unset() -> None: + executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB) + + cmd = executor.build_command( + benchmark=Benchmark.TPCH, + formats=[Format.PARQUET], + ) + + assert "--gh-json-v3" not in cmd + + def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None: script = tmp_path / "fake-bench.py" script.write_text( diff --git a/benchmarks-website/docker-compose.yml b/benchmarks-website/docker-compose.yml index 4c2e9682329..b97482a230a 100644 --- a/benchmarks-website/docker-compose.yml +++ b/benchmarks-website/docker-compose.yml @@ -5,6 +5,20 @@ services: - "80:3000" restart: unless-stopped + vortex-bench-server: + image: ghcr.io/vortex-data/vortex/vortex-bench-server:latest + ports: + - "3001:3000" + environment: + VORTEX_BENCH_DB: "/app/data/bench.duckdb" + VORTEX_BENCH_BIND: "0.0.0.0:3000" + VORTEX_BENCH_LOG: "info,vortex_bench_server=debug" + env_file: + - /etc/vortex-bench/secrets.env + volumes: + - /opt/benchmarks-website/data:/app/data + restart: unless-stopped + watchtower: image: containrrr/watchtower volumes: diff --git a/benchmarks-website/ec2-init.txt b/benchmarks-website/ec2-init.txt index 1c2459b3bee..4e1377cc014 100644 --- a/benchmarks-website/ec2-init.txt +++ b/benchmarks-website/ec2-init.txt @@ -14,4 +14,57 @@ sudo mkdir -p /opt/benchmarks-website sudo cp docker-compose.yml /opt/benchmarks-website/ cd /opt/benchmarks-website - docker compose up -d \ No newline at end of file + docker compose up -d + + ==================================================================== + v3 (vortex-bench-server) — additive setup, runs alongside v2 + ==================================================================== + + v2 stays on port 80 until DNS is flipped. v3 runs on port 3001 from + the same docker-compose.yml on this host. + + 4. Create the bearer-token env file (root:root, mode 600) + sudo mkdir -p /etc/vortex-bench + sudo install -m 600 -o root -g root /dev/null /etc/vortex-bench/secrets.env + # Edit and set INGEST_BEARER_TOKEN=: + sudo vi /etc/vortex-bench/secrets.env + # File contents: + # INGEST_BEARER_TOKEN= + + 5. Create the EBS-backed DuckDB data directory + # Assumes an EBS volume is already mounted at /opt/benchmarks-website/data. + sudo mkdir -p /opt/benchmarks-website/data + sudo chown root:root /opt/benchmarks-website/data + sudo chmod 755 /opt/benchmarks-website/data + + 6. Pull and start v3 (watchtower already polls ghcr.io for refreshes) + cd /opt/benchmarks-website + docker compose pull vortex-bench-server + docker compose up -d vortex-bench-server + # Smoke-check on the host: + curl -sf http://127.0.0.1:3001/health || echo "v3 not responding" + + 7. Install the daily DuckDB backup cron + # Copy the backup script from the repo checkout to a stable location. + sudo install -m 755 -o root -g root \ + benchmarks-website/server/scripts/backup.sh \ + /usr/local/bin/vortex-bench-backup.sh + # Cron entry: 06:00 UTC daily, after the nightly bench finishes. + sudo tee /etc/cron.d/vortex-bench-backup >/dev/null <<'CRON' + 0 6 * * * root /usr/local/bin/vortex-bench-backup.sh >> /var/log/vortex-bench-backup.log 2>&1 + CRON + sudo chmod 644 /etc/cron.d/vortex-bench-backup + # The instance IAM role already permits writes to + # s3://vortex-ci-benchmark-results/ (same role v2's cat-s3.sh uses). + + 8. Bearer-token rotation procedure + # When rotating INGEST_BEARER_TOKEN: + # a. Generate a new token (e.g. `openssl rand -hex 32`). + # b. Update the GitHub Actions Environment secret INGEST_BEARER_TOKEN + # so CI dual-writes use the new value. + # c. On this EC2 host, edit the env file and restart only the v3 + # container so v2 traffic on port 80 is unaffected: + # sudo vi /etc/vortex-bench/secrets.env + # cd /opt/benchmarks-website + # docker compose up -d --force-recreate vortex-bench-server + # d. Verify with `curl` against /health and a token-gated endpoint. \ No newline at end of file diff --git a/benchmarks-website/migrate/Cargo.toml b/benchmarks-website/migrate/Cargo.toml new file mode 100644 index 00000000000..45a752df397 --- /dev/null +++ b/benchmarks-website/migrate/Cargo.toml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +[package] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +edition = "2024" +rust-version = "1.91.0" +license = "Apache-2.0" +description = "One-shot historical migrator from the v2 benchmarks S3 dataset to a v3 DuckDB file" +publish = false + +[[bin]] +name = "vortex-bench-migrate" +path = "src/main.rs" + +# Throwaway binary, not part of the vortex-* public API surface. +# Errors use anyhow, and the crate is intentionally outside the +# workspace public-api lockfile set. + +[dependencies] +anyhow = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +clap = { workspace = true, features = ["derive"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled", "appender-arrow"] } +flate2 = "1.1" +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tracing = { workspace = true, features = ["std"] } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +vortex-bench-server = { path = "../server" } +vortex-utils = { workspace = true } + +[dev-dependencies] +rstest = { workspace = true } +tempfile = { workspace = true } diff --git a/benchmarks-website/migrate/build.rs b/benchmarks-website/migrate/build.rs new file mode 100644 index 00000000000..37bb34d013a --- /dev/null +++ b/benchmarks-website/migrate/build.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +fn main() { + if std::env::var("CARGO_CFG_TARGET_OS").as_deref() == Ok("windows") { + println!("cargo:rustc-link-lib=dylib=rstrtmgr"); + } +} diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs new file mode 100644 index 00000000000..8e1c1e2a110 --- /dev/null +++ b/benchmarks-website/migrate/src/classifier.rs @@ -0,0 +1,853 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Bug-for-bug port of v2's `getGroup`, `formatQuery`, and +//! `normalizeChartName` from `benchmarks-website/server.js`, plus the +//! mapping from v2 group + name pattern to a v3 fact-table bin. +//! +//! The v2 classifier was the source of truth for what historical +//! records mean. It groups records by name prefix into one of: +//! "Random Access", "Compression", "Compression Size", or one of the +//! SQL query suites (with optional fan-out by storage and scale +//! factor for TPC-H/TPC-DS). This module reproduces that logic and +//! then hops to a v3 fact-table bin, since v3 stores dim values as +//! columns instead of name fragments. +//! +//! Engine and format strings stored in v3 columns are pulled from the +//! raw, pre-rename v2 record name. v2's `ENGINE_RENAMES` was a v2 +//! read-time UI concern (e.g. `vortex-file-compressed` rendered as +//! `vortex` and `parquet-tokio-local-disk` rendered as `parquet-nvme`). +//! v3 stores canonical `Format::name()` strings to match what the v3 +//! live emitter writes, so historical and live records share series. + +use crate::v2::V2Record; +use crate::v2::dataset_scale_factor; + +/// Static port of v2's `QUERY_SUITES`. +pub const QUERY_SUITES: &[QuerySuite] = &[ + QuerySuite { + prefix: "clickbench", + display_name: "Clickbench", + query_prefix: "CLICKBENCH", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "statpopgen", + display_name: "Statistical and Population Genetics", + query_prefix: "STATPOPGEN", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "polarsignals", + display_name: "PolarSignals Profiling", + query_prefix: "POLARSIGNALS", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "gharchive", + display_name: "GhArchive", + query_prefix: "GHARCHIVE", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "tpch", + display_name: "TPC-H", + query_prefix: "TPC-H", + dataset_key: Some("tpch"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "tpcds", + display_name: "TPC-DS", + query_prefix: "TPC-DS", + dataset_key: Some("tpcds"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "fineweb", + display_name: "Fineweb", + query_prefix: "FINEWEB", + dataset_key: None, + fan_out: false, + skip: false, + }, +]; + +/// Static port of v2's `ENGINE_RENAMES`. Applied to the "series" half +/// of a benchmark name (the part after the first `/`) before splitting +/// on `:` into engine/format. Order doesn't matter — keys are unique. +const ENGINE_RENAMES: &[(&str, &str)] = &[ + ("datafusion:vortex-file-compressed", "datafusion:vortex"), + ("datafusion:parquet", "datafusion:parquet"), + ("datafusion:arrow", "datafusion:in-memory-arrow"), + ("datafusion:lance", "datafusion:lance"), + ("datafusion:vortex-compact", "datafusion:vortex-compact"), + ("duckdb:vortex-file-compressed", "duckdb:vortex"), + ("duckdb:parquet", "duckdb:parquet"), + ("duckdb:duckdb", "duckdb:duckdb"), + ("duckdb:vortex-compact", "duckdb:vortex-compact"), + ("vortex-tokio-local-disk", "vortex-nvme"), + ("vortex-compact-tokio-local-disk", "vortex-compact-nvme"), + ("lance-tokio-local-disk", "lance-nvme"), + ("parquet-tokio-local-disk", "parquet-nvme"), + ("lance", "lance"), +]; + +/// One entry of `QUERY_SUITES`. +#[derive(Debug, Clone, Copy)] +pub struct QuerySuite { + pub prefix: &'static str, + pub display_name: &'static str, + pub query_prefix: &'static str, + pub dataset_key: Option<&'static str>, + pub fan_out: bool, + pub skip: bool, +} + +/// Group a v2 record falls into. Mirrors `getGroup` in `server.js`, +/// including the fan-out group naming for TPC-H/TPC-DS. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum V2Group { + RandomAccess, + Compression, + CompressionSize, + Query { + suite_index: usize, + /// `Some` for fan-out suites only. + storage: Option, + /// `Some` for fan-out suites only. + scale_factor: Option, + }, +} + +impl V2Group { + /// Display name as v2 served it from `/api/metadata`. + pub fn display_name(&self) -> String { + match self { + V2Group::RandomAccess => "Random Access".into(), + V2Group::Compression => "Compression".into(), + V2Group::CompressionSize => "Compression Size".into(), + V2Group::Query { + suite_index, + storage, + scale_factor, + } => { + let suite = &QUERY_SUITES[*suite_index]; + if let (Some(storage), Some(sf)) = (storage, scale_factor) { + format!("{} ({}) (SF={})", suite.display_name, storage, sf) + } else { + suite.display_name.to_string() + } + } + } + } +} + +/// Apply v2's `ENGINE_RENAMES`. Reproduces the JS `rename`: +/// `RENAMES[s.toLowerCase()] || RENAMES[s] || s`. +pub fn rename_engine(s: &str) -> String { + let lower = s.to_lowercase(); + for (k, v) in ENGINE_RENAMES { + if *k == lower { + return (*v).to_string(); + } + } + for (k, v) in ENGINE_RENAMES { + if *k == s { + return (*v).to_string(); + } + } + s.to_string() +} + +/// Faithful port of v2's `formatQuery`: maps `clickbench_q07` → +/// `"CLICKBENCH Q7"`. Returns the original (uppercased, +/// `-` and `_` replaced with spaces) when no suite matches. +pub fn format_query(q: &str) -> String { + let lower = q.to_lowercase(); + for suite in QUERY_SUITES { + if suite.skip { + continue; + } + let prefix = suite.prefix; + if let Some(rest) = lower.strip_prefix(prefix) + && let Some(idx) = parse_query_index(rest) + { + return format!("{} Q{}", suite.query_prefix, idx); + } + } + let mut out = q.to_uppercase(); + out = out.replace(['_', '-'], " "); + out +} + +/// Parse the `_q07` / ` q7` / `q42` tail used by `format_query`. +/// Returns the integer query index if the tail matches the v2 regex +/// `^[_ ]?q(\d+)`. +fn parse_query_index(rest: &str) -> Option { + let after_sep = rest + .strip_prefix('_') + .or_else(|| rest.strip_prefix(' ')) + .unwrap_or(rest); + let after_q = after_sep + .strip_prefix('q') + .or_else(|| after_sep.strip_prefix('Q'))?; + let digits: String = after_q.chars().take_while(|c| c.is_ascii_digit()).collect(); + if digits.is_empty() { + return None; + } + digits.parse().ok() +} + +/// Faithful port of v2's `normalizeChartName`. +pub fn normalize_chart_name(group: &V2Group, chart_name: &str) -> String { + if matches!(group, V2Group::CompressionSize) && chart_name == "VORTEX FILE COMPRESSED SIZE" { + return "VORTEX SIZE".into(); + } + chart_name.to_string() +} + +/// Port of v2's `getGroup`. Returns `None` for skipped suites +/// (e.g. `fineweb`) or names that match nothing. +pub fn get_group(record: &V2Record) -> Option { + let lower = record.name.to_lowercase(); + + if lower.starts_with("random-access/") || lower.starts_with("random access/") { + return Some(V2Group::RandomAccess); + } + + if lower.starts_with("vortex size/") + || lower.starts_with("vortex-file-compressed size/") + || lower.starts_with("parquet size/") + || lower.starts_with("parquet-zstd size/") + || lower.starts_with("lance size/") + || lower.contains(":raw size/") + || lower.contains(":parquet-zstd size/") + || lower.contains(":lance size/") + { + return Some(V2Group::CompressionSize); + } + + if lower.starts_with("compress time/") + || lower.starts_with("decompress time/") + || lower.starts_with("parquet_rs-zstd compress") + || lower.starts_with("parquet_rs-zstd decompress") + || lower.starts_with("lance compress") + || lower.starts_with("lance decompress") + || lower.starts_with("vortex:lance ratio") + || lower.starts_with("vortex:parquet-zstd ratio") + // Typo'd v2 emitter wrote `parquet-zst` (no `d`) for some + // ratio records; match both spellings so they classify as + // derived ratios instead of falling through to Unknown. + || lower.starts_with("vortex:parquet-zst ratio") + || lower.starts_with("vortex:raw ratio") + { + return Some(V2Group::Compression); + } + + for (i, suite) in QUERY_SUITES.iter().enumerate() { + let prefix_q = format!("{}_q", suite.prefix); + let prefix_slash = format!("{}/", suite.prefix); + if !lower.starts_with(&prefix_q) && !lower.starts_with(&prefix_slash) { + continue; + } + if suite.skip { + return None; + } + if !suite.fan_out { + return Some(V2Group::Query { + suite_index: i, + storage: None, + scale_factor: None, + }); + } + let storage = match record.storage.as_deref().map(str::to_uppercase).as_deref() { + Some("S3") => "S3", + _ => "NVMe", + }; + let dataset_key = suite.dataset_key.unwrap_or(suite.prefix); + let raw_sf = record + .dataset + .as_ref() + .and_then(|d| dataset_scale_factor(d, dataset_key)); + let sf = raw_sf + .as_deref() + .and_then(|s| s.parse::().ok()) + .map(|f| f.round() as i64) + .unwrap_or(1); + return Some(V2Group::Query { + suite_index: i, + storage: Some(storage.into()), + scale_factor: Some(sf.to_string()), + }); + } + + None +} + +/// Group + chart + series breakdown for a v2 record, using the same +/// rules `server.js` applies in `refresh()`. Equivalent to v2's +/// `(group, chartName, seriesName)` triple after rename / skip rules. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct V2Classification { + pub group: V2Group, + pub chart: String, + pub series: String, +} + +/// Apply the same chart / series naming v2's `refresh()` does, plus +/// the throughput / `PARQUET-UNC` skip rules. +pub fn classify_v2(record: &V2Record) -> Option { + if record.name.contains(" throughput") { + return None; + } + let group = get_group(record)?; + let parts: Vec<&str> = record.name.split('/').collect(); + let (chart, series) = match (&group, parts.len()) { + (V2Group::RandomAccess, 4) => { + let chart = format!("{}/{}", parts[1], parts[2]) + .to_uppercase() + .replace(['_', '-'], " "); + let series = rename_engine(if parts[3].is_empty() { + "default" + } else { + parts[3] + }); + (chart, series) + } + (V2Group::RandomAccess, 2) => ( + "RANDOM ACCESS".to_string(), + rename_engine(if parts[1].is_empty() { + "default" + } else { + parts[1] + }), + ), + (V2Group::RandomAccess, _) => return None, + _ => { + let series_raw = if parts.len() >= 2 && !parts[1].is_empty() { + parts[1] + } else { + "default" + }; + let series = rename_engine(series_raw); + let chart = format_query(parts[0]); + (chart, series) + } + }; + let chart = normalize_chart_name(&group, &chart); + if chart.contains("PARQUET-UNC") { + return None; + } + Some(V2Classification { + group, + chart, + series, + }) +} + +/// Mapping target: which v3 fact table a v2 record lands in, plus the +/// dim values that table needs. +#[derive(Debug, Clone, PartialEq)] +pub enum V3Bin { + Query { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + query_idx: i32, + storage: String, + engine: String, + format: String, + }, + CompressionTime { + dataset: String, + dataset_variant: Option, + format: String, + op: String, + }, + CompressionSize { + dataset: String, + dataset_variant: Option, + format: String, + }, + RandomAccess { + dataset: String, + format: String, + }, +} + +/// Top-level entry point. Combines `classify_v2` with the v3 fact-table +/// mapping. Returns `None` for records that: +/// +/// - Don't match any v2 group (uncategorized prefix). +/// - Are explicitly skipped by v2 (throughput, PARQUET-UNC, fineweb). +/// - Are computed-at-read-time ratios that v3 derives from +/// `compression_sizes` (`vortex:parquet-zstd ratio …`, +/// `vortex:lance ratio …`, `vortex:raw ratio …`, +/// `vortex:* size/…`). +pub fn classify(record: &V2Record) -> Option { + let cls = classify_v2(record)?; + match &cls.group { + V2Group::RandomAccess => bin_random_access(record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + } +} + +/// Reason the classifier dropped a record. Intentional skips (v2 +/// patterns v3 deliberately doesn't store) are NOT errors; they don't +/// count against the uncategorized gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Skip { + /// `vortex:* ratio …` and `vortex:* size` — derived in v3 from + /// `compression_sizes` joined to itself. + DerivedRatio, + /// `throughput` records — v2 derived these from latencies. + Throughput, + /// A v2 query suite marked `skip: true` in QUERY_SUITES. + SkippedSuite, + /// random-access record with an unsupported part count. + UnsupportedShape, + /// Record had no `value` field. + NoValue, + /// Dim outside the v3 emitter's allowlist (e.g. `parquet-zstd`, + /// historical-only suites no longer in CI). + Deprecated, + /// v2 memory measurements (`*_memory/*` records). Carry top-level + /// `peak_physical_memory` / `peak_virtual_memory` / + /// `physical_memory_delta` / `virtual_memory_delta` fields that + /// `V2Record` doesn't deserialize. Not migrated for alpha; merging + /// into the corresponding QueryMeasurement row is future work. + HistoricalMemory, +} + +/// Engines the v3 emitter produces today. Mirrors +/// `vortex-bench/src/lib.rs::Engine`. Anything else is historical and gets +/// bucketed as `Skip::Deprecated`. +const V3_ENGINES: &[&str] = &["datafusion", "duckdb", "vortex", "arrow"]; + +/// Formats the v3 emitter produces today (`Format::name()` values from +/// `vortex-bench/src/lib.rs`). +const V3_FORMATS: &[&str] = &[ + "vortex-file-compressed", + "vortex-compact", + "parquet", + "lance", + "csv", + "arrow", + "duckdb", +]; + +/// Query suites the v3 CI runs today. Suites outside this list still +/// classify (so historical analyses stay coherent) but get bucketed +/// as `Skip::Deprecated` so they don't render as orphan charts in v3. +/// +/// `fineweb` is included because `.github/workflows/sql-benchmarks.yml` +/// still has `fineweb` and `fineweb-s3` matrix entries. `gharchive` +/// stays excluded — it's defined in `vortex-bench` but no current +/// workflow runs it. +const V3_QUERY_SUITES: &[&str] = &[ + "clickbench", + "tpch", + "tpcds", + "statpopgen", + "polarsignals", + "fineweb", +]; + +/// Returns true if every dim that v3 stores as a column is on the +/// emitter's current allowlist. Dim values outside the allowlist mean +/// historical-only formats / engines that the v3 UI has nothing to +/// render against. +fn is_v3_dim(bin: &V3Bin) -> bool { + match bin { + V3Bin::Query { engine, format, .. } => { + V3_ENGINES.contains(&engine.as_str()) && V3_FORMATS.contains(&format.as_str()) + } + V3Bin::CompressionTime { format, .. } + | V3Bin::CompressionSize { format, .. } + | V3Bin::RandomAccess { format, .. } => V3_FORMATS.contains(&format.as_str()), + } +} + +/// Outcome of running the classifier on a v2 record. Distinguishes +/// "we know we don't want this" (`Skip`) from "we don't recognize this" +/// (`Unknown`); the migrator's 5% gate fires only on the latter. +#[derive(Debug, Clone)] +pub enum Outcome { + Bin(V3Bin), + Skip(Skip), + Unknown, +} + +/// Like [`classify`], but reports *why* a record was dropped. Intended +/// for the migrator so the 5% uncategorized gate doesn't trip on +/// records v2 deliberately doesn't render (ratios, throughput, +/// skipped suites). +pub fn classify_outcome(record: &V2Record) -> Outcome { + if record.name.contains(" throughput") { + return Outcome::Skip(Skip::Throughput); + } + // v2 memory records: e.g. "clickbench_q07_memory/datafusion:parquet". + // Match the `_memory/` infix BEFORE the engine/format split, so they + // route to a known Skip variant instead of slipping through to + // Outcome::Unknown and tripping the 5% gate. + let lower = record.name.to_lowercase(); + if let Some((head, _)) = lower.split_once('/') + && head.ends_with("_memory") + { + return Outcome::Skip(Skip::HistoricalMemory); + } + let Some(group) = get_group(record) else { + return Outcome::Unknown; + }; + if let V2Group::Query { suite_index, .. } = &group + && QUERY_SUITES[*suite_index].skip + { + return Outcome::Skip(Skip::SkippedSuite); + } + let Some(cls) = classify_v2(record) else { + // get_group succeeded but classify_v2 didn't — shape mismatch. + return Outcome::Skip(Skip::UnsupportedShape); + }; + let derived = match &cls.group { + V2Group::Compression => { + let lc = cls.chart.to_lowercase(); + lc.contains("ratio") || lc.contains(':') + } + V2Group::CompressionSize => cls.chart.to_lowercase().contains(':'), + _ => false, + }; + if derived { + return Outcome::Skip(Skip::DerivedRatio); + } + let bin = match &cls.group { + V2Group::RandomAccess => match bin_random_access(record) { + Some(b) => Some(b), + // `bin_random_access` only returns None for malformed + // shapes (empty dataset/pattern segment, empty/`default` + // format). Route them to Skip so the `Outcome::Unknown` + // arm below — and the 5% uncategorized gate in + // `migrate::run` — don't trip on them. + None => return Outcome::Skip(Skip::UnsupportedShape), + }, + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + }; + let Some(bin) = bin else { + return Outcome::Unknown; + }; + if !is_v3_dim(&bin) { + return Outcome::Skip(Skip::Deprecated); + } + if let V2Group::Query { suite_index, .. } = &group + && !V3_QUERY_SUITES.contains(&QUERY_SUITES[*suite_index].prefix) + { + return Outcome::Skip(Skip::Deprecated); + } + Outcome::Bin(bin) +} + +fn bin_random_access(record: &V2Record) -> Option { + // Pull dataset and format from the raw, pre-rename v2 name so v3 + // stores meaningful values. Two raw shapes are supported: + // + // - 4-part `random-access///-tokio-local-disk` + // - 2-part legacy `random-access/-tokio-local-disk` + // + // The 2-part shape is what `random-access-bench`'s `measurement_name` + // emits when called without an `AccessPattern`, and per its source + // comment that path is only taken for the legacy taxi run + // (`if dataset.name() == "taxi"` in `benchmarks/random-access-bench/ + // src/main.rs`). The live v3 emitter `random_access_record` writes + // `dataset="taxi"` for those same measurements, so the historical + // 2-part records are taxi too — assigning `dataset="taxi"` here + // recovers the time series instead of letting it disappear under + // v2's "RANDOM ACCESS" placeholder. Deriving from the raw name + // (rather than `cls.chart`) keeps this independent of v2's + // `normalizeChartName`. + // + // After stripping the `-tokio-local-disk` suffix, map the v2 + // random-access ext label (`vortex`, from `Format::ext()`) to the + // canonical name (`vortex-file-compressed`, from `Format::name()`). + // `parquet` and `lance` match between ext and name. The `vortex` + // ext is shared by both `OnDiskVortex` (name + // `vortex-file-compressed`) and `VortexCompact` (name + // `vortex-compact`), but v2's random-access bench only emitted + // `OnDiskVortex`, so mapping to `vortex-file-compressed` is + // correct for all historical data. + // + // Records whose `` segment ends in `-footer` (the bench's + // reopen-mode variant, e.g. `parquet-tokio-local-disk-footer`) + // intentionally do not strip clean to a v3-allowlisted format; the + // outer `is_v3_dim` filter then routes them to `Skip::Deprecated`. + // The live v3 emitter doesn't distinguish reopen vs cached either + // (`random_access_record` uses `format.name()` for both), so + // dropping `-footer` here keeps migration consistent with what + // v3 ingests live. + let parts: Vec<&str> = record.name.split('/').collect(); + let (dataset, raw_format) = match parts.as_slice() { + [_, ds, pat, format] => { + if ds.is_empty() || pat.is_empty() { + return None; + } + (format!("{ds}/{pat}").to_lowercase(), *format) + } + [_, format] => ("taxi".to_string(), *format), + _ => return None, + }; + if raw_format.is_empty() || raw_format == "default" { + return None; + } + let stripped = raw_format + .strip_suffix("-tokio-local-disk") + .unwrap_or(raw_format); + let format = match stripped { + "vortex" => "vortex-file-compressed".to_string(), + other => other.to_lowercase(), + }; + Some(V3Bin::RandomAccess { dataset, format }) +} + +fn bin_compression_time(cls: &V2Classification, _record: &V2Record) -> Option { + // v2 compression chart names look like (after format_query): + // "COMPRESS TIME" [vortex/encode] + // "DECOMPRESS TIME" [vortex/decode] + // "PARQUET RS ZSTD COMPRESS TIME" [parquet/encode] + // "PARQUET RS ZSTD DECOMPRESS TIME" [parquet/decode] + // "LANCE COMPRESS TIME" [lance/encode] + // "LANCE DECOMPRESS TIME" [lance/decode] + // "VORTEX:LANCE RATIO COMPRESS TIME" [drop] + // "VORTEX:PARQUET-ZSTD RATIO COMPRESS TIME" [drop] + // "VORTEX:RAW RATIO COMPRESS TIME" [drop] + let lc = cls.chart.to_lowercase(); + if lc.contains("ratio") || lc.contains(':') { + // Ratios are computed at read time from compression_sizes. + return None; + } + let (format, op) = if lc.starts_with("compress time") { + ("vortex-file-compressed", "encode") + } else if lc.starts_with("decompress time") { + ("vortex-file-compressed", "decode") + } else if lc.starts_with("parquet rs zstd compress time") { + ("parquet", "encode") + } else if lc.starts_with("parquet rs zstd decompress time") { + ("parquet", "decode") + } else if lc.starts_with("lance compress time") { + ("lance", "encode") + } else if lc.starts_with("lance decompress time") { + ("lance", "decode") + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + Some(V3Bin::CompressionTime { + dataset, + dataset_variant: None, + format: format.to_string(), + op: op.to_string(), + }) +} + +fn bin_compression_size(cls: &V2Classification, record: &V2Record) -> Option { + let lc = cls.chart.to_lowercase(); + // Ratios like "VORTEX:PARQUET ZSTD SIZE" / "VORTEX:LANCE SIZE" / + // "VORTEX:RAW SIZE" are derived from compression_sizes at read + // time, not stored. + if lc.contains(':') { + return None; + } + // `parquet-zstd size` shares a leading "parquet" with `parquet size`, + // so check the more specific prefix first. `format_query` upper-cases + // and replaces `-`/`_` with spaces, so the chart we match against is + // `"PARQUET ZSTD SIZE"` (no hyphen) — same convention as the existing + // `"parquet rs zstd compress time"` branches above. + let format = if lc.starts_with("vortex size") { + "vortex-file-compressed" + } else if lc.starts_with("parquet zstd size") { + "parquet-zstd" + } else if lc.starts_with("parquet size") { + "parquet" + } else if lc.starts_with("lance size") { + "lance" + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + // Mirror the file-sizes ingest path's dataset_variant derivation + // (see `migrate::migrate_file_sizes`): pull the SF out of the v2 + // record's `dataset` object when present and run it through + // `canonical_scale_factor` so `"1"`, `"1.0"`, `"10"` and `"10.0"` + // collapse to one canonical form. Without this both code paths + // produce the same `mid` only by accident, so SF=10 file-sizes + // rows wouldn't merge with the matching data.json.gz + // "vortex size/tpch" rows when one side wrote `"10"` and the + // other wrote `"10.0"`. + let dataset_variant = crate::v2::canonical_scale_factor( + record + .dataset + .as_ref() + .and_then(|d| crate::v2::dataset_scale_factor(d, dataset.as_str())) + .as_deref(), + ); + Some(V3Bin::CompressionSize { + dataset, + dataset_variant, + format: format.to_string(), + }) +} + +fn bin_query(cls: &V2Classification, record: &V2Record) -> Option { + let V2Group::Query { + suite_index, + storage, + scale_factor, + } = &cls.group + else { + return None; + }; + let suite = &QUERY_SUITES[*suite_index]; + + // Pull the query index from the *raw* name's first part instead of + // the formatted chart, so we don't have to round-trip "Q07". + let raw_first = record.name.split('/').next().unwrap_or(""); + let query_idx = parse_query_index_from_first(raw_first)?; + + // Pull engine:format from the raw, pre-rename second segment so v3 + // stores canonical `Format::name()` strings (e.g. + // `vortex-file-compressed`) that match what the v3 live emitter + // writes. `cls.series` has been through v2's `ENGINE_RENAMES` for + // UI display and is not appropriate for v3 columns. + // + // Older v2 records emitted display-case engines (e.g. `DataFusion`, + // `DuckDB`); newer ones emit lowercase. Lowercase here so dedup + // collapses both spellings into a single canonical row. + let raw_series = record.name.split('/').nth(1)?; + let (engine, format) = split_engine_format(raw_series)?; + let engine = engine.to_lowercase(); + let format = format.to_lowercase(); + + let storage_v3 = match storage.as_deref() { + Some("S3") => "s3".to_string(), + Some("NVMe") => "nvme".to_string(), + _ => "nvme".to_string(), + }; + + // ClickBench's "flavor" lives in dataset_variant per benchmark-mapping.md + // - we don't have it from a v2 name string, so we leave it None. + Some(V3Bin::Query { + dataset: suite.prefix.to_string(), + dataset_variant: None, + scale_factor: scale_factor.clone(), + query_idx, + storage: storage_v3, + engine, + format, + }) +} + +/// Pull the integer query index out of the leading name part, which is +/// always `_q` or ` q` for SQL query records. +fn parse_query_index_from_first(first: &str) -> Option { + let lower = first.to_lowercase(); + for suite in QUERY_SUITES { + if let Some(rest) = lower.strip_prefix(suite.prefix) + && let Some(idx) = parse_query_index(rest) + { + return Some(idx as i32); + } + } + None +} + +/// Split a renamed series like `datafusion:parquet` into +/// `(engine, format)`. Returns `None` for series with no `:` since +/// v3 requires both columns. +fn split_engine_format(series: &str) -> Option<(String, String)> { + let mut split = series.splitn(2, ':'); + let engine = split.next()?.trim().to_string(); + let format = split.next()?.trim().to_string(); + if engine.is_empty() || format.is_empty() { + return None; + } + Some((engine, format)) +} + +#[cfg(test)] +mod tests { + use anyhow::Context as _; + + use super::*; + + fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: None, + value: None, + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } + } + + #[test] + fn format_query_round_trips() { + assert_eq!(format_query("clickbench_q07"), "CLICKBENCH Q7"); + assert_eq!(format_query("tpch_q01"), "TPC-H Q1"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("statpopgen_q3"), "STATPOPGEN Q3"); + assert_eq!(format_query("foo bar"), "FOO BAR"); + } + + #[test] + fn rename_engine_canonicalizes_disk_names() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("unknown-engine"), "unknown-engine"); + } + + #[test] + fn parse_query_index_handles_separators() { + assert_eq!(parse_query_index("_q07"), Some(7)); + assert_eq!(parse_query_index(" q7"), Some(7)); + assert_eq!(parse_query_index("q42"), Some(42)); + assert_eq!(parse_query_index("xq7"), None); + } + + #[test] + fn random_access_bins_dataset_pattern() -> anyhow::Result<()> { + let bin = classify(&record("random-access/taxi/take/parquet")) + .context("classify returned None for a known-good 4-part random-access name")?; + assert_eq!( + bin, + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + } + ); + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/commits.rs b/benchmarks-website/migrate/src/commits.rs new file mode 100644 index 00000000000..87c53caa41b --- /dev/null +++ b/benchmarks-website/migrate/src/commits.rs @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Commit upserts. Adapts a [`crate::v2::V2Commit`] into the v3 +//! `commits` row shape (a [`vortex_bench_server::records::CommitInfo`]). + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Transaction; +use duckdb::params; + +use crate::v2::V2Commit; + +/// Insert a v3 `commits` row for one v2 commit. `tree_sha` and `url` +/// remain required and use a warning-bearing empty-string fallback; +/// the human-input fields (message, author/committer name and email) +/// are nullable in the v3 schema, so empty / missing values map to +/// SQL `NULL` instead of an empty string the UI would render as a +/// blank cell. +pub fn upsert_commit(tx: &Transaction<'_>, commit: &V2Commit) -> Result { + let mut warnings = Vec::new(); + let timestamp = require_field(&commit.timestamp, "timestamp", &commit.id, &mut warnings); + let message = optional_field(&commit.message); + let author_name = optional_field(&commit.author.as_ref().and_then(|p| p.name.clone())); + let author_email = optional_field(&commit.author.as_ref().and_then(|p| p.email.clone())); + let committer_name = optional_field(&commit.committer.as_ref().and_then(|p| p.name.clone())); + let committer_email = optional_field(&commit.committer.as_ref().and_then(|p| p.email.clone())); + let tree_sha = require_field(&commit.tree_id, "tree_id", &commit.id, &mut warnings); + let url = require_field(&commit.url, "url", &commit.id, &mut warnings); + + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + commit.id, + timestamp, + message, + author_name, + author_email, + committer_name, + committer_email, + tree_sha, + url, + ], + ) + .with_context(|| format!("upserting commit {}", commit.id))?; + Ok(UpsertOutcome { warnings }) +} + +fn require_field( + field: &Option, + name: &str, + sha: &str, + warnings: &mut Vec, +) -> String { + match field { + Some(s) => s.clone(), + None => { + warnings.push(format!("commit {sha} missing {name}")); + String::new() + } + } +} + +/// Coerce a v2-supplied `Option` into a SQL-bindable +/// `Option`, treating an empty / whitespace-only value as +/// missing. v2 sometimes wrote `""` for blank author / committer / +/// message fields; storing those as actual `NULL` lets the UI +/// distinguish "missing metadata" from "deliberately blank". +fn optional_field(field: &Option) -> Option { + field + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) +} + +/// Per-call warning bag returned to the caller for logging. +#[derive(Debug, Default)] +pub struct UpsertOutcome { + pub warnings: Vec, +} diff --git a/benchmarks-website/migrate/src/lib.rs b/benchmarks-website/migrate/src/lib.rs new file mode 100644 index 00000000000..b5aa72bc97d --- /dev/null +++ b/benchmarks-website/migrate/src/lib.rs @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! One-shot historical migrator from v2's S3-hosted benchmark dataset +//! to a v3 DuckDB file. +//! +//! The v2 dataset is JSONL of bare benchmark records keyed by name string. +//! v3 uses five typed fact tables with explicit dim columns. This crate +//! ports v2's `getGroup` classifier (in `benchmarks-website/server.js`) +//! bug-for-bug so that historical rows survive the migration with the +//! same group / chart / series structure as the live v2 server. +//! +//! The migrator is throwaway: once v3 cuts over, both the binary and +//! the classifier go away. + +/// Routing v2 records into v3 fact tables, ported from v2's `getGroup`. +pub mod classifier; +/// V2 commit -> v3 `commits` row upserts. +pub mod commits; +/// End-to-end migration of v2 dumps into a v3 DuckDB. +pub mod migrate; +/// Streaming readers for the v2 S3 bucket and local dumps. +pub mod source; +/// Wire shapes of the v2 benchmark dataset. +pub mod v2; +/// Structural diff between a migrated v3 DuckDB and v2's `/api/metadata`. +pub mod verify; diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs new file mode 100644 index 00000000000..366834ed441 --- /dev/null +++ b/benchmarks-website/migrate/src/main.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! `vortex-bench-migrate` CLI: a one-shot historical migrator from +//! v2's S3 dataset into a v3 DuckDB file, plus a structural diff +//! against the live v2 `/api/metadata` endpoint for spotting +//! classifier regressions. + +use std::path::PathBuf; +use std::process::ExitCode; + +use anyhow::Context as _; +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use clap::ValueEnum; +use tracing_subscriber::EnvFilter; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; +use vortex_bench_migrate::verify; + +/// One-shot historical migrator from v2's S3 dataset to v3 DuckDB. +#[derive(Debug, Parser)] +#[command(name = "vortex-bench-migrate", version, about)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Read v2's data.json.gz / commits.json / file-sizes-*.json.gz + /// and write a fully populated v3 DuckDB at `--output`. + Run { + /// Path to write the v3 DuckDB to. Created if absent. + #[arg(long)] + output: PathBuf, + /// Where to fetch v2 dumps from. + #[arg(long, value_enum, default_value_t = SourceKind::PublicS3)] + source: SourceKind, + /// For `--source=local`, the directory containing + /// `data.json.gz`, `commits.json`, and `file-sizes-*.json.gz`. + #[arg(long, required_if_eq("source", "local"))] + source_dir: Option, + }, + /// Diff a migrated DuckDB against the live v2 `/api/metadata` + /// endpoint. Exits 0 if every v2 group is present in v3, 1 + /// otherwise so this can gate a CI step. + Verify { + /// HTTPS root of a running v2 server (e.g. `https://bench.vortex.dev`). + #[arg(long)] + against: String, + /// Path to the migrated v3 DuckDB. + #[arg(long)] + duckdb: PathBuf, + }, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum SourceKind { + PublicS3, + Local, +} + +fn main() -> ExitCode { + if let Err(err) = run() { + eprintln!("error: {err:#}"); + return ExitCode::from(2); + } + ExitCode::SUCCESS +} + +fn run() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + match cli.command { + Command::Run { + output, + source, + source_dir, + } => { + let source = match source { + SourceKind::PublicS3 => Source::PublicS3, + SourceKind::Local => { + Source::Local(source_dir.context("--source=local requires --source-dir")?) + } + }; + let summary = migrate::run(&source, &output)?; + print!("{summary}"); + if summary.uncategorized_fraction() > 0.05 { + anyhow::bail!( + "uncategorized records ({:.2}%) exceed the 5% gate; \ + stop and report unmatched prefixes (see summary above) \ + before proceeding", + 100.0 * summary.uncategorized_fraction() + ); + } + Ok(()) + } + Command::Verify { against, duckdb } => { + let report = verify::run(&against, &duckdb)?; + print!("{report}"); + if !report.v2_groups_covered() { + std::process::exit(1); + } + Ok(()) + } + } +} diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs new file mode 100644 index 00000000000..167c793c581 --- /dev/null +++ b/benchmarks-website/migrate/src/migrate.rs @@ -0,0 +1,936 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! End-to-end migration of one v2 dataset into a v3 DuckDB file. +//! +//! Streams `data.json.gz` line-by-line, runs each record through the +//! [`classifier`], and writes one row per record into the appropriate v3 fact table. +//! Every row's `measurement_id` is computed via the server's `measurement_id_*` functions so the +//! result is byte-compatible with what fresh `/api/ingest` would have produced. +//! +//! Bulk-load shape: rows are accumulated in memory as parallel column +//! vectors, deduplicated by `measurement_id`, then flushed to DuckDB +//! via `Appender::append_record_batch` as one Arrow `RecordBatch` per +//! fact table. + +use std::collections::BTreeMap; +use std::io::BufRead; +use std::path::Path; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use anyhow::Context as _; +use anyhow::Result; +use arrow_array::ArrayRef; +use arrow_array::Int32Array; +use arrow_array::Int64Array; +use arrow_array::ListArray; +use arrow_array::RecordBatch; +use arrow_array::StringArray; +use arrow_buffer::OffsetBuffer; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Schema; +use duckdb::Connection; +use tracing::info; +use tracing::warn; +use vortex_bench_server::db::measurement_id_compression_size; +use vortex_bench_server::db::measurement_id_compression_time; +use vortex_bench_server::db::measurement_id_query; +use vortex_bench_server::db::measurement_id_random_access; +use vortex_bench_server::records::CompressionSize; +use vortex_bench_server::records::CompressionTime; +use vortex_bench_server::records::QueryMeasurement; +use vortex_bench_server::records::RandomAccessTime; +use vortex_bench_server::schema::SCHEMA_DDL; +use vortex_utils::aliases::hash_map::HashMap; + +use crate::classifier; +use crate::classifier::V3Bin; +use crate::commits::upsert_commit; +use crate::source::KNOWN_FILE_SIZES_SUITES; +use crate::source::Source; +use crate::v2::V2Commit; +use crate::v2::V2FileSize; +use crate::v2::V2Record; +use crate::v2::canonical_scale_factor; +use crate::v2::index_commits; +use crate::v2::runtime_as_i64; +use crate::v2::value_as_f64; + +/// Per-table insert counts, plus skip / missing counts. +#[derive(Debug, Default, Clone)] +pub struct MigrationSummary { + pub records_read: u64, + pub query_inserted: u64, + pub compression_time_inserted: u64, + pub compression_size_inserted: u64, + pub random_access_inserted: u64, + pub file_size_inserted: u64, + pub uncategorized: u64, + pub uncategorized_prefixes: BTreeMap, + pub missing_commit: u64, + pub commit_warnings: u64, + pub skipped_no_value: u64, + pub skipped_intentional: u64, + pub commits_inserted: u64, + pub deduped: u64, + /// Number of records dropped by dedup whose `value_ns` (or + /// `value_bytes` for compression_sizes' replace path) differed + /// from the kept row's. Non-zero is a smell worth investigating. + pub deduped_with_conflict: u64, +} + +impl MigrationSummary { + /// Total `data.json.gz` records that landed in some v3 fact table. + pub fn total_inserted(&self) -> u64 { + self.query_inserted + + self.compression_time_inserted + + self.compression_size_inserted + + self.random_access_inserted + } + + /// Fraction of records that were uncategorized. The orchestrator + /// stops if this exceeds the documented 5% threshold. + pub fn uncategorized_fraction(&self) -> f64 { + if self.records_read == 0 { + return 0.0; + } + self.uncategorized as f64 / self.records_read as f64 + } +} + +/// Open or create a DuckDB at `path` and apply the v3 schema. The +/// migrator is a one-shot fresh load; the bulk-append flush is pure +/// insert (no `ON CONFLICT`), so any stale rows in `path` would clash +/// with the next run on the same primary keys. Delete both the +/// database file and its WAL companion up front so every run starts +/// from a known-empty state. +pub fn open_target_db(path: &Path) -> Result { + remove_if_exists(path)?; + let wal = wal_path(path); + remove_if_exists(&wal)?; + let conn = + Connection::open(path).with_context(|| format!("opening DuckDB at {}", path.display()))?; + conn.execute_batch(SCHEMA_DDL) + .context("applying v3 schema DDL")?; + Ok(conn) +} + +fn remove_if_exists(path: &Path) -> Result<()> { + match std::fs::remove_file(path) { + Ok(()) => { + info!(path = %path.display(), "removed pre-existing target file"); + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(e).with_context(|| format!("removing {}", path.display())), + } +} + +/// DuckDB writes its write-ahead log next to the database file with a +/// `.wal` suffix appended (e.g. `v3.duckdb` -> `v3.duckdb.wal`). +fn wal_path(path: &Path) -> std::path::PathBuf { + let mut name = path.as_os_str().to_owned(); + name.push(".wal"); + std::path::PathBuf::from(name) +} + +/// Run the whole migration: commits, data.json.gz, and every +/// file-sizes-*.json.gz under the source. +pub fn run(source: &Source, target: &Path) -> Result { + let mut conn = open_target_db(target)?; + let mut summary = MigrationSummary::default(); + + info!(source = %source.describe(), "Reading commits.json"); + let commits = read_commits(source)?; + info!(commits = commits.len(), "Loaded commits"); + summary.commits_inserted = upsert_all_commits(&mut conn, &commits, &mut summary)?; + + let mut q = QueryAccum::default(); + let mut ct = CompressionTimeAccum::default(); + let mut cs = CompressionSizeAccum::default(); + let mut ra = RandomAccessAccum::default(); + + info!("Migrating data.json.gz"); + migrate_data_jsonl( + source, + &commits, + &mut summary, + &mut q, + &mut ct, + &mut cs, + &mut ra, + )?; + info!(records = summary.records_read, "data.json.gz done"); + + for name in source.list_file_sizes()? { + info!(name = %name, "Migrating file-sizes"); + if let Err(e) = migrate_file_sizes(source, &name, &commits, &mut summary, &mut cs) { + warn!("file-sizes file {name} failed: {e:#}"); + } + } + + info!("Flushing accumulators to DuckDB"); + flush_all(&conn, q, ct, ra, cs, &mut summary)?; + + Ok(summary) +} + +/// Flush each accumulator's batch and bump the matching per-fact +/// summary counter only AFTER the flush succeeds. This way a flush +/// failure leaves the counter at zero (or its previous value) rather +/// than reporting rows that never landed in DuckDB. +fn flush_all( + conn: &Connection, + q: QueryAccum, + ct: CompressionTimeAccum, + ra: RandomAccessAccum, + cs: CompressionSizeAccum, + summary: &mut MigrationSummary, +) -> Result<()> { + let batch = build_query_batch(q)?; + let n = batch.num_rows() as u64; + flush(conn, "query_measurements", batch)?; + summary.query_inserted = n; + + let batch = build_compression_time_batch(ct)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_times", batch)?; + summary.compression_time_inserted = n; + + let batch = build_random_access_batch(ra)?; + let n = batch.num_rows() as u64; + flush(conn, "random_access_times", batch)?; + summary.random_access_inserted = n; + + let batch = build_compression_size_batch(cs)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_sizes", batch)?; + summary.compression_size_inserted = n; + + Ok(()) +} + +fn read_commits(source: &Source) -> Result> { + let reader = source.open_commits_jsonl()?; + let mut commits: Vec = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(c) => commits.push(c), + Err(e) => warn!("skipping malformed commits.json line: {e}"), + } + } + Ok(index_commits(commits)) +} + +fn upsert_all_commits( + conn: &mut Connection, + commits: &BTreeMap, + summary: &mut MigrationSummary, +) -> Result { + let tx = conn.transaction().context("begin commits transaction")?; + let mut count = 0u64; + for commit in commits.values() { + let outcome = upsert_commit(&tx, commit)?; + for w in outcome.warnings { + warn!("{w}"); + summary.commit_warnings += 1; + } + count += 1; + } + tx.commit().context("commit commits transaction")?; + Ok(count) +} + +/// Stream `data.json.gz` and push classified records into the +/// per-table accumulators. Dedup happens inside each accumulator's +/// `push` method by `measurement_id`. +fn migrate_data_jsonl( + source: &Source, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) -> Result<()> { + let reader = source.open_data_jsonl()?; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + summary.records_read += 1; + let record: V2Record = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed data.json line: {e}"); + continue; + } + }; + apply_v2_record(&record, commits, summary, q, ct, cs, ra); + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.records_read as f64 / elapsed.max(0.001); + info!( + records = summary.records_read, + rate = format!("{rate:.0}/s"), + query = q.measurement_id.len(), + compression_time = ct.measurement_id.len(), + compression_size = cs.rows.len(), + random_access = ra.measurement_id.len(), + "migration progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +fn apply_v2_record( + record: &V2Record, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) { + let Some(sha) = record.commit_id.clone() else { + summary.missing_commit += 1; + return; + }; + if !commits.contains_key(&sha) { + summary.missing_commit += 1; + return; + } + + let bin = match classifier::classify_outcome(record) { + classifier::Outcome::Bin(b) => b, + classifier::Outcome::Skip(_) => { + summary.skipped_intentional += 1; + return; + } + classifier::Outcome::Unknown => { + summary.uncategorized += 1; + let prefix = record.name.split('/').next().unwrap_or("").to_string(); + *summary.uncategorized_prefixes.entry(prefix).or_insert(0) += 1; + return; + } + }; + + let env_triple = record.env_triple.as_ref().and_then(|t| t.to_triple()); + let runtimes = record + .all_runtimes + .as_ref() + .map(|v| v.iter().filter_map(runtime_as_i64).collect::>()) + .unwrap_or_default(); + let value_f64 = match record.value.as_ref().and_then(value_as_f64) { + Some(v) => v, + None => { + summary.skipped_no_value += 1; + return; + } + }; + + match bin { + V3Bin::Query { + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + } => { + let qm = QueryMeasurement { + commit_sha: sha, + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple, + }; + let mid = measurement_id_query(&qm); + q.push(mid, qm, summary); + } + V3Bin::CompressionTime { + dataset, + dataset_variant, + format, + op, + } => { + let ctr = CompressionTime { + commit_sha: sha, + dataset, + dataset_variant, + format, + op, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_compression_time(&ctr); + ct.push(mid, ctr, summary); + } + V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + } => { + let csr = CompressionSize { + commit_sha: sha, + dataset, + dataset_variant, + format, + value_bytes: value_f64 as i64, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_replace(mid, csr, summary); + } + V3Bin::RandomAccess { dataset, format } => { + let rar = RandomAccessTime { + commit_sha: sha, + dataset, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_random_access(&rar); + ra.push(mid, rar, summary); + } + } +} + +fn migrate_file_sizes( + source: &Source, + name: &str, + commits: &BTreeMap, + summary: &mut MigrationSummary, + cs: &mut CompressionSizeAccum, +) -> Result<()> { + let reader = source.open_file_sizes(name)?; + // Prefix unknown-id fallbacks with `unknown:` so they're clearly + // labeled in the UI rather than masquerading as a dataset name. + let dataset_fallback = { + let stripped = name + .strip_prefix("file-sizes-") + .and_then(|s| s.strip_suffix(".json.gz")) + .unwrap_or(name); + if KNOWN_FILE_SIZES_SUITES.contains(&stripped) { + stripped.to_string() + } else { + format!("unknown:{stripped}") + } + }; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let sz: V2FileSize = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed {name} line: {e}"); + continue; + } + }; + if !commits.contains_key(&sz.commit_id) { + summary.missing_commit += 1; + continue; + } + let dataset = if sz.benchmark.is_empty() { + dataset_fallback.clone() + } else { + sz.benchmark.clone() + }; + // Run SF through canonical_scale_factor so `"1"`, `"1.0"`, `"10"` + // and `"10.0"` collapse to one form, matching what + // `bin_compression_size` writes for the data.json.gz path. + let dataset_variant = canonical_scale_factor(sz.scale_factor.as_deref()); + let csr = CompressionSize { + commit_sha: sz.commit_id.clone(), + dataset, + dataset_variant, + format: sz.format.clone(), + value_bytes: sz.size_bytes, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_sum(mid, csr); + summary.file_size_inserted += 1; + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.file_size_inserted as f64 / elapsed.max(0.001); + info!( + name = %name, + file_sizes = summary.file_size_inserted, + rate = format!("{rate:.0}/s"), + "file-sizes progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +/// Append an Arrow `RecordBatch` to a DuckDB table via `Appender`. +fn flush(conn: &Connection, table: &str, batch: RecordBatch) -> Result<()> { + let mut app = conn + .appender(table) + .with_context(|| format!("opening appender for {table}"))?; + app.append_record_batch(batch) + .with_context(|| format!("appending record batch to {table}"))?; + drop(app); + Ok(()) +} + +#[derive(Default)] +struct QueryAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + scale_factor: Vec>, + query_idx: Vec, + storage: Vec, + engine: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + peak_physical: Vec>, + peak_virtual: Vec>, + physical_delta: Vec>, + virtual_delta: Vec>, + env_triple: Vec>, + /// `mid` -> index in the parallel column vecs. Lets us look up the + /// kept row's `value_ns` on collision so we can flag conflicts. + seen: HashMap, +} + +impl QueryAccum { + fn push(&mut self, mid: i64, r: QueryMeasurement, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.scale_factor.push(r.scale_factor); + self.query_idx.push(r.query_idx); + self.storage.push(r.storage); + self.engine.push(r.engine); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.peak_physical.push(r.peak_physical); + self.peak_virtual.push(r.peak_virtual); + self.physical_delta.push(r.physical_delta); + self.virtual_delta.push(r.virtual_delta); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct CompressionTimeAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + format: Vec, + op: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl CompressionTimeAccum { + fn push(&mut self, mid: i64, r: CompressionTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.format.push(r.format); + self.op.push(r.op); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct RandomAccessAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl RandomAccessAccum { + fn push(&mut self, mid: i64, r: RandomAccessTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +/// `compression_sizes` is fed by both data.json.gz (replace-on-collision) +/// and file-sizes-*.json.gz (sum-on-collision). Stored as a map; converted +/// to a `RecordBatch` at flush time. +#[derive(Default)] +struct CompressionSizeAccum { + rows: HashMap, +} + +impl CompressionSizeAccum { + /// data.json.gz path: latest write wins, mirroring the prior + /// `ON CONFLICT DO UPDATE SET value_bytes = excluded.value_bytes`. + /// Bumps `deduped_with_conflict` when an existing row's + /// `value_bytes` differs from the incoming row's, so silent + /// value-corruption is observable. + fn push_replace(&mut self, mid: i64, r: CompressionSize, summary: &mut MigrationSummary) { + if let Some(existing) = self.rows.get(&mid) + && existing.value_bytes != r.value_bytes + { + summary.deduped_with_conflict += 1; + } + self.rows.insert(mid, r); + } + + /// file-sizes-*.json.gz path: per-file rows aggregate into one + /// `(commit, dataset, dataset_variant, format)` row by summing, + /// mirroring the prior `value_bytes = compression_sizes.value_bytes + /// + excluded.value_bytes`. + fn push_sum(&mut self, mid: i64, r: CompressionSize) { + let add = r.value_bytes; + self.rows + .entry(mid) + .and_modify(|x| x.value_bytes += add) + .or_insert(r); + } +} + +fn build_query_batch(a: QueryAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("scale_factor", DataType::Utf8, true), + Field::new("query_idx", DataType::Int32, false), + Field::new("storage", DataType::Utf8, false), + Field::new("engine", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("peak_physical", DataType::Int64, true), + Field::new("peak_virtual", DataType::Int64, true), + Field::new("physical_delta", DataType::Int64, true), + Field::new("virtual_delta", DataType::Int64, true), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.scale_factor)), + Arc::new(Int32Array::from(a.query_idx)), + Arc::new(StringArray::from(a.storage)), + Arc::new(StringArray::from(a.engine)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(Int64Array::from(a.peak_physical)), + Arc::new(Int64Array::from(a.peak_virtual)), + Arc::new(Int64Array::from(a.physical_delta)), + Arc::new(Int64Array::from(a.virtual_delta)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_time_batch(a: CompressionTimeAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("op", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.format)), + Arc::new(StringArray::from(a.op)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_random_access_batch(a: RandomAccessAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_size_batch(a: CompressionSizeAccum) -> Result { + let n = a.rows.len(); + let mut measurement_id = Vec::with_capacity(n); + let mut commit_sha = Vec::with_capacity(n); + let mut dataset = Vec::with_capacity(n); + let mut dataset_variant = Vec::with_capacity(n); + let mut format = Vec::with_capacity(n); + let mut value_bytes = Vec::with_capacity(n); + for (mid, cs) in a.rows { + measurement_id.push(mid); + commit_sha.push(cs.commit_sha); + dataset.push(cs.dataset); + dataset_variant.push(cs.dataset_variant); + format.push(cs.format); + value_bytes.push(cs.value_bytes); + } + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("value_bytes", DataType::Int64, false), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(measurement_id)), + Arc::new(StringArray::from(commit_sha)), + Arc::new(StringArray::from(dataset)), + Arc::new(StringArray::from(dataset_variant)), + Arc::new(StringArray::from(format)), + Arc::new(Int64Array::from(value_bytes)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +/// Build a non-nullable `List` Arrow array from one inner Vec +/// per row. The outer list is non-null; inner i64 values are non-null. +fn build_list_int64(values: Vec>) -> ListArray { + let mut offsets: Vec = Vec::with_capacity(values.len() + 1); + offsets.push(0); + let mut flat: Vec = Vec::new(); + for inner in values { + flat.extend_from_slice(&inner); + offsets.push(flat.len() as i32); + } + let values_arr = Int64Array::from(flat); + let field = Arc::new(Field::new("item", DataType::Int64, false)); + ListArray::new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values_arr), + None, + ) +} + +/// Print the summary in a human-readable form. Returned by the CLI. +impl std::fmt::Display for MigrationSummary { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Records read: {}", self.records_read)?; + writeln!(f, "Commits upserted: {}", self.commits_inserted)?; + writeln!(f, "Commit warnings: {}", self.commit_warnings)?; + writeln!(f, "Inserted (query): {}", self.query_inserted)?; + writeln!( + f, + "Inserted (compress t): {}", + self.compression_time_inserted + )?; + writeln!( + f, + "Inserted (compress s): {}", + self.compression_size_inserted + )?; + writeln!(f, "Inserted (random acc): {}", self.random_access_inserted)?; + writeln!(f, "Inserted (file sizes): {}", self.file_size_inserted)?; + writeln!(f, "Missing commit: {}", self.missing_commit)?; + writeln!(f, "Skipped (no value): {}", self.skipped_no_value)?; + writeln!(f, "Skipped (intentional): {}", self.skipped_intentional)?; + writeln!(f, "Deduplicated: {}", self.deduped)?; + writeln!(f, "Dedup w/ value diff: {}", self.deduped_with_conflict)?; + writeln!( + f, + "Uncategorized: {} ({:.2}%)", + self.uncategorized, + 100.0 * self.uncategorized_fraction() + )?; + if !self.uncategorized_prefixes.is_empty() { + let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); + top.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, "Top uncategorized prefixes:")?; + for (prefix, n) in top.iter().take(20) { + writeln!(f, " {prefix:>32} : {n}")?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use vortex_bench_server::records::QueryMeasurement; + + use super::*; + + fn open_db_without(table: &str) -> Result<(tempfile::TempDir, Connection)> { + let dir = tempfile::TempDir::new()?; + let path = dir.path().join("v3.duckdb"); + let conn = open_target_db(&path)?; + conn.execute_batch(&format!("DROP TABLE {table}"))?; + Ok((dir, conn)) + } + + fn one_query_row() -> QueryMeasurement { + QueryMeasurement { + commit_sha: "deadbeef".into(), + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + value_ns: 100, + all_runtimes_ns: vec![100], + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple: None, + } + } + + #[test] + fn flush_all_does_not_overcount_on_failure() -> Result<()> { + // Drop `compression_times` before flushing so the second + // flush in `flush_all` fails. The first (queries) succeeded, + // so its counter must be set; the failed table's counter and + // every later table's counter must stay at zero. + let (_dir, conn) = open_db_without("compression_times")?; + + let mut summary = MigrationSummary::default(); + let mut q = QueryAccum::default(); + let qm = one_query_row(); + let mid = vortex_bench_server::db::measurement_id_query(&qm); + q.push(mid, qm, &mut summary); + + let ct = CompressionTimeAccum::default(); + let ra = RandomAccessAccum::default(); + let cs = CompressionSizeAccum::default(); + + let result = flush_all(&conn, q, ct, ra, cs, &mut summary); + assert!(result.is_err(), "expected flush to fail on missing table"); + + assert_eq!( + summary.query_inserted, 1, + "query flushed before the failure must be counted" + ); + assert_eq!( + summary.compression_time_inserted, 0, + "failed flush must not bump the counter" + ); + assert_eq!(summary.random_access_inserted, 0, "later flushes never ran"); + assert_eq!( + summary.compression_size_inserted, 0, + "later flushes never ran" + ); + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/source.rs b/benchmarks-website/migrate/src/source.rs new file mode 100644 index 00000000000..d0c059569dd --- /dev/null +++ b/benchmarks-website/migrate/src/source.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Streaming readers for v2's public S3 bucket. +//! +//! The bucket is `--no-sign-request`, so we fetch the underlying +//! HTTPS URL directly and stream-decompress with `flate2`. The +//! downloads are wrapped in [`reqwest::blocking`] to keep the read +//! path synchronous; the binary's hot path is single-threaded +//! per-source already (DuckDB is a single-writer). +//! +//! For tests and offline runs, [`Source::Local`] accepts a local +//! directory of dumps; the migrator's `--source` flag picks the +//! variant. + +use std::fs::File; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use flate2::read::GzDecoder; +use tracing::info; + +/// Public S3 bucket the live v2 server reads from. +pub const PUBLIC_BUCKET_BASE: &str = "https://vortex-ci-benchmark-results.s3.amazonaws.com"; + +/// Where to read the v2 dataset from. Either the public S3 bucket +/// (the live deployment) or a local directory of dumps. +#[derive(Debug, Clone)] +pub enum Source { + /// HTTPS GETs against `s3.amazonaws.com`. + PublicS3, + /// A directory containing `data.json.gz`, `commits.json`, and + /// `file-sizes-*.json.gz` files. + Local(PathBuf), +} + +impl Source { + /// Short human-readable description for log messages. + pub fn describe(&self) -> String { + match self { + Source::PublicS3 => "public S3 bucket".to_string(), + Source::Local(p) => format!("local dir {}", p.display()), + } + } + + /// Open `data.json.gz` for streaming, decompressing on the fly. + pub fn open_data_jsonl(&self) -> Result> { + let stream = self.open_raw("data.json.gz")?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + /// Open `commits.json` (uncompressed). + pub fn open_commits_jsonl(&self) -> Result> { + let stream = self.open_raw("commits.json")?; + Ok(Box::new(BufReader::new(stream))) + } + + /// Enumerate `file-sizes-*.json.gz` files. For local sources this + /// is a directory glob; for the public bucket we hit the documented + /// suite ids. + pub fn list_file_sizes(&self) -> Result> { + match self { + Source::Local(dir) => { + let mut out = Vec::new(); + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let name = entry.file_name(); + let s = name.to_string_lossy(); + if s.starts_with("file-sizes-") && s.ends_with(".json.gz") { + out.push(s.into_owned()); + } + } + out.sort(); + Ok(out) + } + Source::PublicS3 => { + // The S3 bucket's ListObjects is denied for unsigned + // requests, so we hit the documented per-suite keys + // emitted by `.github/workflows/sql-benchmarks.yml`. + Ok(KNOWN_FILE_SIZES_SUITES + .iter() + .map(|id| format!("file-sizes-{id}.json.gz")) + .collect()) + } + } + } + + /// Open one `file-sizes-*.json.gz` for streaming. + pub fn open_file_sizes(&self, name: &str) -> Result> { + let stream = self.open_raw(name)?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + fn open_raw(&self, name: &str) -> Result> { + match self { + Source::Local(dir) => open_local(&dir.join(name)), + Source::PublicS3 => open_s3(name), + } + } +} + +fn open_local(path: &Path) -> Result> { + let f = File::open(path).with_context(|| format!("opening {}", path.display()))?; + Ok(Box::new(f)) +} + +fn open_s3(name: &str) -> Result> { + let url = format!("{PUBLIC_BUCKET_BASE}/{name}"); + info!(url = %url, "GET"); + let resp = reqwest::blocking::get(&url).with_context(|| format!("GET {url}"))?; + if !resp.status().is_success() { + anyhow::bail!("GET {url} returned {}", resp.status()); + } + Ok(Box::new(resp)) +} + +/// Suite IDs we know publish a `file-sizes-{id}.json.gz` to S3. +/// +/// Source of truth: the `matrix.id` values in +/// `.github/workflows/sql-benchmarks.yml`'s `benchmark_matrix` default. +/// The post-bench `file-sizes` step uploads `file-sizes-${{ matrix.id +/// }}.json.gz`, so this list must match those IDs verbatim. Adding a +/// new matrix entry to that workflow means adding the same ID here. +pub(crate) const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ + "clickbench-nvme", + "tpch-nvme", + "tpch-s3", + "tpch-nvme-10", + "tpch-s3-10", + "tpcds-nvme", + "statpopgen", + "fineweb", + "fineweb-s3", + "polarsignals", +]; diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs new file mode 100644 index 00000000000..dd8190346bb --- /dev/null +++ b/benchmarks-website/migrate/src/v2.rs @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes of the v2 benchmark dataset on S3. +//! +//! These types capture only the fields the migrator reads. v2 records +//! are serialized by `vortex-bench` (see `vortex-bench/src/measurements.rs`) +//! and by older non-Rust scripts; the union of fields is loose, so we +//! deserialize permissively (`serde(default)`, untyped `serde_json::Value` +//! for the polymorphic `dataset` field). + +use std::collections::BTreeMap; + +use serde::Deserialize; + +/// One JSONL line of `data.json.gz`. +/// +/// The shape is the union of every emitter's output. Most fields are +/// optional because different benches emit different subsets. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Record { + pub name: String, + #[serde(default)] + pub commit_id: Option, + #[serde(default)] + pub unit: Option, + #[serde(default)] + pub value: Option, + #[serde(default)] + pub storage: Option, + #[serde(default)] + pub dataset: Option, + #[serde(default)] + pub all_runtimes: Option>, + #[serde(default)] + pub env_triple: Option, +} + +/// `dataset` in v2 records is sometimes a string, sometimes an object +/// keyed by suite name (`{ "tpch": { "scale_factor": "10" } }`). +/// This helper looks up the scale factor for a given suite without +/// assuming a particular shape. +pub fn dataset_scale_factor(dataset: &serde_json::Value, key: &str) -> Option { + let obj = dataset.as_object()?; + let entry = obj.get(key)?; + let sf = entry.get("scale_factor")?; + match sf { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + _ => None, + } +} + +/// Canonicalize a v2 scale-factor string for use in `dataset_variant`. +/// +/// v2 emitters wrote scale factors as either `"1"`, `"1.0"`, `"10"`, or +/// `"10.0"` for the same logical SF, so the data.json.gz path +/// (`bin_compression_size`) and the file-sizes-*.json.gz path +/// (`migrate_file_sizes`) would otherwise produce different +/// `dataset_variant` strings and never collapse onto the same +/// `measurement_id`. Parse to f64 and format with no trailing zeros so +/// every shape collapses to one canonical form (`"1"`, `"10"`, `"0.1"`). +/// SF=1 is the implicit default and folds to `None`. +pub fn canonical_scale_factor(raw: Option<&str>) -> Option { + let s = raw?.trim(); + if s.is_empty() { + return None; + } + let value: f64 = s.parse().ok()?; + if value == 1.0 { + return None; + } + Some(format!("{value}")) +} + +/// Best-effort numeric coercion for the polymorphic `value` field. +pub fn value_as_f64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => n.as_f64(), + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Best-effort coercion of a runtime entry to nanoseconds. +pub fn runtime_as_i64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Some(i) + } else { + n.as_f64().map(|f| f as i64) + } + } + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Triple block as emitted by `vortex-bench`'s `--gh-json` path. v2 +/// stored it as an object; we serialize it back out as `arch-os-env`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2EnvTriple { + #[serde(default)] + pub architecture: Option, + #[serde(default)] + pub operating_system: Option, + #[serde(default)] + pub environment: Option, +} + +impl V2EnvTriple { + /// Format as the `arch-os-env` triple used by v3's `env_triple` column. + pub fn to_triple(&self) -> Option { + let arch = self.architecture.as_deref()?; + let os = self.operating_system.as_deref()?; + let env = self.environment.as_deref()?; + Some(format!("{arch}-{os}-{env}")) + } +} + +/// One JSONL line of `commits.json`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Commit { + pub id: String, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub message: Option, + #[serde(default)] + pub author: Option, + #[serde(default)] + pub committer: Option, + #[serde(default)] + pub tree_id: Option, + #[serde(default)] + pub url: Option, +} + +/// Author or committer block on a v2 commit record. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Person { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub email: Option, +} + +/// One JSONL line of `file-sizes-*.json.gz` produced by +/// `scripts/capture-file-sizes.py`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2FileSize { + pub commit_id: String, + pub benchmark: String, + #[serde(default)] + pub scale_factor: Option, + pub format: String, + pub file: String, + pub size_bytes: i64, +} + +/// Build a sha-keyed map of commits. +pub fn index_commits(commits: Vec) -> BTreeMap { + commits.into_iter().map(|c| (c.id.clone(), c)).collect() +} diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs new file mode 100644 index 00000000000..743dff1e528 --- /dev/null +++ b/benchmarks-website/migrate/src/verify.rs @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Structural diff between a migrated v3 DuckDB and the live v2 +//! `/api/metadata` endpoint. +//! +//! Compares group / chart structure only; values aren't compared +//! because v2 converts ns → ms and bytes → MiB on read while v3 +//! stores raw and the chart query divides. Group/chart structural +//! equivalence is enough to spot classifier regressions before +//! cutover. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::path::Path; + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Connection; +use serde::Deserialize; + +use crate::classifier::QUERY_SUITES; + +/// Result of one `verify` run. +#[derive(Debug, Default)] +pub struct VerifyReport { + pub matched_groups: Vec, + pub only_in_v3: Vec, + pub only_in_v2: Vec, + pub chart_diffs: Vec, +} + +/// One group's chart-count divergence between v2 and v3, captured when the +/// group is structurally present on both sides but the counts differ. +#[derive(Debug, Clone)] +pub struct ChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, +} + +impl VerifyReport { + /// True if every v2 group is represented in v3. The CLI's exit + /// code reflects this. + pub fn v2_groups_covered(&self) -> bool { + self.only_in_v2.is_empty() + } +} + +impl std::fmt::Display for VerifyReport { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Groups in both v2 and v3:")?; + for g in &self.matched_groups { + writeln!(f, " + {g}")?; + } + if !self.only_in_v2.is_empty() { + writeln!(f, "Groups only in v2 (regression candidates):")?; + for g in &self.only_in_v2 { + writeln!(f, " - {g}")?; + } + } + if !self.only_in_v3.is_empty() { + writeln!(f, "Groups only in v3:")?; + for g in &self.only_in_v3 { + writeln!(f, " + {g}")?; + } + } + if !self.chart_diffs.is_empty() { + writeln!(f, "Chart count diffs:")?; + for d in &self.chart_diffs { + writeln!( + f, + " {} : v2={} v3={} (delta={})", + d.group, + d.v2_count, + d.v3_count, + d.v3_count as i64 - d.v2_count as i64, + )?; + } + } + Ok(()) + } +} + +/// v2's `/api/metadata` reply — only the fields we need. +#[derive(Debug, Deserialize)] +struct V2Metadata { + groups: BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct V2GroupMeta { + #[serde(default)] + charts: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2ChartMeta { + #[serde(default)] + name: String, +} + +/// Open the migrated DuckDB at `duckdb_path`, fetch `/api/metadata`, +/// and produce a structural diff. +pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { + let v3 = collect_v3_groups(duckdb_path)?; + let v2 = fetch_v2_metadata(v2_server)?; + Ok(diff(&v2, &v3)) +} + +fn collect_v3_groups(duckdb_path: &Path) -> Result>> { + let conn = Connection::open(duckdb_path) + .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; + let mut groups: BTreeMap> = BTreeMap::new(); + + // query_measurements: chart per (dataset, query_idx); group per + // (dataset, dataset_variant, scale_factor, storage). We want v2 + // group display names so the verifier can compare apples to + // apples, so we re-format them here using the same suite table. + let mut stmt = conn.prepare( + r#" + SELECT dataset, dataset_variant, scale_factor, storage, query_idx + FROM query_measurements + GROUP BY dataset, dataset_variant, scale_factor, storage, query_idx + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, String>(3)?, + row.get::<_, i32>(4)?, + )) + })?; + for row in rows { + let (dataset, _variant, sf, storage, query_idx) = row?; + let group_name = display_query_group(&dataset, sf.as_deref(), &storage); + let chart_name = chart_name_query(&dataset, query_idx); + groups + .entry(group_name) + .or_default() + .insert(normalize_chart(&chart_name)); + } + + // compression_times: group "Compression", charts per dataset. + let mut stmt = conn.prepare( + r#" + SELECT dataset, format, op + FROM compression_times + GROUP BY dataset, format, op + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + )) + })?; + for row in rows { + let (dataset, format, op) = row?; + let chart = chart_name_compression_time(&format, &op, &dataset); + groups + .entry("Compression".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT dataset, format + FROM compression_sizes + GROUP BY dataset, format + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + for row in rows { + let (_dataset, format) = row?; + let chart = chart_name_compression_size(&format); + groups + .entry("Compression Size".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT DISTINCT dataset + FROM random_access_times + "#, + )?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + for row in rows { + let dataset = row?; + groups + .entry("Random Access".to_string()) + .or_default() + .insert(normalize_chart(&dataset)); + } + + Ok(groups) +} + +fn fetch_v2_metadata(server: &str) -> Result>> { + let url = format!("{}/api/metadata", server.trim_end_matches('/')); + let body = reqwest::blocking::get(&url) + .with_context(|| format!("GET {url}"))? + .error_for_status() + .with_context(|| format!("non-2xx from {url}"))? + .json::() + .with_context(|| format!("parsing {url} as v2 /api/metadata"))?; + let mut out: BTreeMap> = BTreeMap::new(); + for (name, group) in body.groups { + let charts = group + .charts + .into_iter() + .map(|c| normalize_chart(&c.name)) + .collect(); + out.insert(name, charts); + } + Ok(out) +} + +fn diff( + v2: &BTreeMap>, + v3: &BTreeMap>, +) -> VerifyReport { + let mut report = VerifyReport::default(); + let v2_keys: BTreeSet<&String> = v2.keys().collect(); + let v3_keys: BTreeSet<&String> = v3.keys().collect(); + for g in v2_keys.intersection(&v3_keys) { + report.matched_groups.push((**g).clone()); + let v2_charts = &v2[*g]; + let v3_charts = &v3[*g]; + if v2_charts.len() != v3_charts.len() { + report.chart_diffs.push(ChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + }); + } + } + for g in v3_keys.difference(&v2_keys) { + report.only_in_v3.push((**g).clone()); + } + for g in v2_keys.difference(&v3_keys) { + report.only_in_v2.push((**g).clone()); + } + report.matched_groups.sort(); + report.only_in_v3.sort(); + report.only_in_v2.sort(); + report +} + +fn display_query_group(dataset: &str, scale_factor: Option<&str>, storage: &str) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) if suite.fan_out => { + let storage_disp = match storage { + "s3" | "S3" => "S3", + _ => "NVMe", + }; + let sf = scale_factor.unwrap_or("1"); + format!("{} ({}) (SF={})", suite.display_name, storage_disp, sf) + } + Some(suite) => suite.display_name.to_string(), + None => format!("{dataset} ({storage})"), + } +} + +fn chart_name_query(dataset: &str, query_idx: i32) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) => format!("{} Q{}", suite.query_prefix, query_idx), + None => format!("{} Q{}", dataset.to_uppercase(), query_idx), + } +} + +fn chart_name_compression_time(format: &str, op: &str, _dataset: &str) -> String { + // Re-derive the v2 chart name (the metric, not the dataset) so we + // can compare. v2's chart axis is the metric; series is the + // dataset. v3 inverts that. For structural comparison, we project + // back to v2's per-chart key. + match (format, op) { + ("vortex-file-compressed", "encode") => "COMPRESS TIME".into(), + ("vortex-file-compressed", "decode") => "DECOMPRESS TIME".into(), + ("parquet", "encode") => "PARQUET RS ZSTD COMPRESS TIME".into(), + ("parquet", "decode") => "PARQUET RS ZSTD DECOMPRESS TIME".into(), + ("lance", "encode") => "LANCE COMPRESS TIME".into(), + ("lance", "decode") => "LANCE DECOMPRESS TIME".into(), + _ => format!("{} {} TIME", format.to_uppercase(), op.to_uppercase()), + } +} + +fn chart_name_compression_size(format: &str) -> String { + match format { + "vortex-file-compressed" => "VORTEX SIZE".into(), + "parquet" => "PARQUET SIZE".into(), + "lance" => "LANCE SIZE".into(), + _ => format!("{} SIZE", format.to_uppercase()), + } +} + +/// Strip casing and `_-` differences between v2 and v3 chart names. +/// v2 displays uppercase; v3 stores raw values. Comparing in this +/// canonical form is enough for structural verification. +fn normalize_chart(s: &str) -> String { + s.trim() + .to_uppercase() + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_chart_canonicalizes() { + assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); + assert_eq!(normalize_chart("TAXI/TAKE"), "TAXI/TAKE"); + assert_eq!(normalize_chart("tpc-h q1"), "TPC H Q1"); + assert_eq!(normalize_chart("tpc h q1"), "TPC H Q1"); + } + + #[test] + fn display_query_group_handles_fan_out() { + assert_eq!( + display_query_group("tpch", Some("10"), "s3"), + "TPC-H (S3) (SF=10)" + ); + assert_eq!( + display_query_group("tpch", Some("100"), "nvme"), + "TPC-H (NVMe) (SF=100)" + ); + assert_eq!( + display_query_group("clickbench", None, "nvme"), + "Clickbench" + ); + } +} diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs new file mode 100644 index 00000000000..71e97cb6c9a --- /dev/null +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -0,0 +1,531 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Classifier behavior pinned by representative v2 names from each +//! group in `benchmarks-website/server.js`'s `getGroup`. + +use rstest::rstest; +use serde_json::json; +use vortex_bench_migrate::classifier::Outcome; +use vortex_bench_migrate::classifier::Skip; +use vortex_bench_migrate::classifier::V3Bin; +use vortex_bench_migrate::classifier::classify; +use vortex_bench_migrate::classifier::classify_outcome; +use vortex_bench_migrate::classifier::format_query; +use vortex_bench_migrate::classifier::rename_engine; +use vortex_bench_migrate::v2::V2Record; + +fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: Some("ns".into()), + value: Some(json!(123)), + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } +} + +fn record_with_storage_and_sf(name: &str, storage: &str, suite: &str, sf: &str) -> V2Record { + let mut r = record(name); + r.storage = Some(storage.into()); + r.dataset = Some(json!({ suite: { "scale_factor": sf } })); + r +} + +#[rstest] +#[case::clickbench( + "clickbench_q07/datafusion:parquet", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::clickbench_vortex_renamed( + "clickbench_q12/datafusion:vortex-file-compressed", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 12, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::statpopgen( + "statpopgen_q3/datafusion:parquet", + V3Bin::Query { + dataset: "statpopgen".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 3, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::polarsignals( + "polarsignals_q1/duckdb:parquet", + V3Bin::Query { + dataset: "polarsignals".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 1, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "parquet".into(), + }, +)] +fn non_fan_out_query_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::tpch_s3_sf100( + "tpch_q01/datafusion:parquet", + "S3", + "tpch", + "100", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("100".into()), + query_idx: 1, + storage: "s3".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::tpch_nvme_sf1( + "tpch_q22/duckdb:vortex-file-compressed", + "NVMe", + "tpch", + "1", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + query_idx: 22, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::tpcds_nvme_sf10( + "tpcds_q05/datafusion:vortex-file-compressed", + "NVMe", + "tpcds", + "10", + V3Bin::Query { + dataset: "tpcds".into(), + dataset_variant: None, + scale_factor: Some("10".into()), + query_idx: 5, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +fn fan_out_query_records( + #[case] name: &str, + #[case] storage: &str, + #[case] suite: &str, + #[case] sf: &str, + #[case] expected: V3Bin, +) { + let r = record_with_storage_and_sf(name, storage, suite, sf); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::random_access_4_part( + "random-access/taxi/take/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_vortex( + "random-access/chimp/take/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "chimp/take".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::random_access_4_part_lance( + "random-access/taxi/take/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "lance".into(), + }, +)] +fn random_access_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::compress_time_vortex( + "compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "encode".into(), + }, +)] +#[case::decompress_time_vortex( + "decompress time/tpch_lineitem", + V3Bin::CompressionTime { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "decode".into(), + }, +)] +#[case::parquet_compress( + "parquet_rs-zstd compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + op: "encode".into(), + }, +)] +#[case::lance_decompress( + "lance decompress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "lance".into(), + op: "decode".into(), + }, +)] +fn compression_time_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::vortex_size( + "vortex size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::vortex_file_compressed_size_normalizes( + "vortex-file-compressed size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::parquet_size( + "parquet size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + }, +)] +#[case::lance_size( + "lance size/tpch_lineitem", + V3Bin::CompressionSize { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "lance".into(), + }, +)] +fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::ratio_vortex_parquet("vortex:parquet-zstd ratio compress time/clickbench")] +#[case::ratio_vortex_lance("vortex:lance ratio decompress time/clickbench")] +#[case::ratio_size_vortex_parquet("vortex:parquet-zstd size/clickbench")] +#[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] +#[case::throughput("compress throughput/clickbench")] +#[case::nonsense_prefix("not-a-known-bench/series")] +#[case::random_access_3_part("random-access/taxi/parquet-tokio-local-disk")] +fn unmapped_records_yield_none(#[case] name: &str) { + let r = record(name); + assert_eq!( + classify(&r), + None, + "expected {name:?} to classify as None (drop)", + ); +} + +#[rstest] +#[case::parquet_2_part( + "random-access/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "parquet".into(), + }, +)] +#[case::vortex_2_part( + "random-access/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::lance_2_part( + "random-access/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "lance".into(), + }, +)] +fn random_access_2_part_legacy_recovered_as_taxi(#[case] name: &str, #[case] expected: V3Bin) { + // The 2-part shape `random-access/-tokio-local-disk` is + // emitted by `random-access-bench`'s legacy taxi run (no + // `AccessPattern`, see `measurement_name` in + // `benchmarks/random-access-bench/src/main.rs`). The live v3 + // emitter writes `dataset="taxi"` for those measurements, so the + // historical 2-part records on S3 must land in the same v3 + // chart instead of being dropped as `UnsupportedShape`. + let r = record(name); + assert_eq!( + classify(&r), + Some(expected), + "2-part legacy random-access must recover as dataset=taxi" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/lance-tokio-local-disk-footer")] +fn random_access_2_part_footer_is_deprecated(#[case] name: &str) { + // The reopen-mode `-footer` variant is a different access pattern + // (file is reopened per take). The live v3 emitter passes the + // bare `format.name()` for both reopen and cached, so it can't + // distinguish them on the wire. Keep migration consistent with + // that by routing `-footer` 2-part records to Skip::Deprecated + // (they don't strip clean to a v3-allowlisted format). + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "2-part `-footer` random-access must be Skip::Deprecated" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/taxi/correlated/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/feature-vectors/uniform/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/nested-structs/correlated/lance-tokio-local-disk-footer")] +fn random_access_4_part_footer_is_deprecated(#[case] name: &str) { + // Same reasoning as 2-part `-footer`: the format string ends in + // `-tokio-local-disk-footer`, the strip_suffix doesn't match, and + // the unstripped value fails the V3_FORMATS allowlist. + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "4-part `-footer` random-access must be Skip::Deprecated" + ); +} + +#[test] +fn parquet_zstd_size_is_deprecated() { + // `parquet-zstd` is not on the v3 emitter's format allowlist, so + // historical `parquet-zstd size/...` records bucket under + // Skip::Deprecated and don't render as orphan charts in v3. + let r = record("parquet-zstd size/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn vortex_parquet_zstd_ratio_is_intentional_skip() { + let r = record("vortex:parquet-zstd ratio compress time/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::DerivedRatio) + )); +} + +#[test] +fn vortex_parquet_zst_typo_ratio_is_intentional_skip() { + // `parquet-zst` (no trailing `d`) was emitted by some v2 runs. + // Both spellings should classify as derived ratios. + for name in [ + "vortex:parquet-zst ratio compress time/clickbench", + "vortex:parquet-zst ratio decompress time/clickbench", + ] { + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::DerivedRatio)), + "{name:?} should be DerivedRatio", + ); + } +} + +#[test] +fn throughput_is_intentional_skip() { + let r = record("compress throughput/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Throughput) + )); +} + +#[test] +fn unknown_prefix_is_unknown() { + let r = record("not-a-known-bench/series"); + assert!(matches!(classify_outcome(&r), Outcome::Unknown)); +} + +#[test] +fn gharchive_q00_is_deprecated() { + // gharchive isn't on the v3 query-suite allowlist, so historical + // gharchive query records bucket as Skip::Deprecated. + let r = record("gharchive_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn fineweb_q00_classifies() { + // fineweb is on V3_QUERY_SUITES (still emitted by v3 CI per + // .github/workflows/sql-benchmarks.yml's `fineweb` matrix entry), + // so historical fineweb records ingest like any other suite. + let r = record("fineweb_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Bin(V3Bin::Query { .. }) + )); +} + +#[test] +fn memory_record_is_historical_memory_skip() { + // v2 emitted `_q_memory/:` records that + // carry top-level memory fields V2Record doesn't deserialize. + // Skip them with a known variant so they don't trip the 5% gate. + let r = record("clickbench_q07_memory/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::HistoricalMemory) + )); +} + +#[test] +fn tpch_compression_size_carries_scale_factor() { + // The data.json.gz "vortex size/tpch" path needs to derive + // dataset_variant from the v2 record's `dataset` object, the same + // way the file-sizes path does. Otherwise SF=10 rows from the two + // sources never collide on `mid` and produce duplicate rows. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "10" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset, "tpch"); + assert_eq!(dataset_variant, Some("10".into())); + assert_eq!(format, "vortex-file-compressed"); +} + +#[test] +fn tpch_compression_size_drops_default_scale_factor() { + // SF "1.0" matches the file-sizes path's filter and collapses to + // dataset_variant: None. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "1.0" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset_variant, None); +} + +#[rstest] +// SF=1 is the implicit default; both spellings must drop to None so +// `bin_compression_size` and `migrate_file_sizes` agree. +#[case::int_one("1", None)] +#[case::float_one("1.0", None)] +// SF=10 must produce the same canonical string regardless of spelling. +#[case::int_ten("10", Some("10".into()))] +#[case::float_ten("10.0", Some("10".into()))] +#[case::float_fractional("0.1", Some("0.1".into()))] +#[case::whitespace(" 10 ", Some("10".into()))] +#[case::empty("", None)] +fn compression_size_scale_factor_canonicalizes( + #[case] raw_sf: &str, + #[case] expected: Option, +) { + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": raw_sf } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize) for sf={raw_sf:?}, got {outcome:?}"); + }; + assert_eq!(dataset_variant, expected, "sf={raw_sf:?}"); +} + +#[test] +fn engine_casing_lowercased() { + // Older v2 records emitted display-case engines like `DataFusion` + // and `DuckDB`. The classifier lowercases at push time so dedup + // collapses display-case rows into the canonical lowercase ones. + let r = record("clickbench_q07/DataFusion:parquet"); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::Query { engine, format, .. }) = outcome else { + panic!("expected Bin(Query), got {outcome:?}"); + }; + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); +} + +#[test] +fn rename_engine_pins_canonical_outputs() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("LANCE"), "lance"); +} + +#[test] +fn format_query_pins_v2_display() { + assert_eq!(format_query("clickbench_q00"), "CLICKBENCH Q0"); + assert_eq!(format_query("tpch_q22"), "TPC-H Q22"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("polarsignals_q1"), "POLARSIGNALS Q1"); + // Names that don't match a suite fall back to upper + " " replace. + assert_eq!( + format_query("vortex-file-compressed size"), + "VORTEX FILE COMPRESSED SIZE" + ); +} diff --git a/benchmarks-website/migrate/tests/end_to_end.rs b/benchmarks-website/migrate/tests/end_to_end.rs new file mode 100644 index 00000000000..25bf5c0ad55 --- /dev/null +++ b/benchmarks-website/migrate/tests/end_to_end.rs @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Inline JSONL fixtures driven through the full migration into a +//! tempdir DuckDB. No live S3. + +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use duckdb::Connection; +use flate2::Compression; +use flate2::write::GzEncoder; +use tempfile::TempDir; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; + +const COMMITS_JSONL: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture commit","author":{"name":"A","email":"a@example.com"},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + +const DATA_JSONL: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":42000,"all_runtimes":[41000,42000,43000]} +{"name":"compress time/clickbench","commit_id":"deadbeef","unit":"ns","value":99} +{"name":"vortex size/clickbench","commit_id":"deadbeef","unit":"bytes","value":1024} +{"name":"random-access/taxi/take/parquet-tokio-local-disk","commit_id":"deadbeef","unit":"ns","value":777,"all_runtimes":[700,777,800]} +"#; + +/// Build a local-source fixture directory. Caller supplies the contents +/// of `commits.json`, `data.json.gz`, and any number of +/// `file-sizes-*.json.gz` files (name → contents). +fn build_fixture(commits: &str, data: &str, file_sizes: &[(&str, &str)]) -> TempDir { + let dir = TempDir::new().expect("tempdir"); + write_text(&dir.path().join("commits.json"), commits); + write_gz(&dir.path().join("data.json.gz"), data); + for (name, body) in file_sizes { + write_gz(&dir.path().join(name), body); + } + dir +} + +fn write_text(path: &Path, body: &str) { + let mut f = File::create(path).unwrap(); + f.write_all(body.as_bytes()).unwrap(); +} + +fn write_gz(path: &Path, body: &str) { + let f = File::create(path).unwrap(); + let mut gz = GzEncoder::new(f, Compression::default()); + gz.write_all(body.as_bytes()).unwrap(); + gz.finish().unwrap(); +} + +#[test] +fn migrate_inline_fixture_populates_each_table() { + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 4, "summary={summary}"); + assert_eq!(summary.uncategorized, 0, "summary={summary}"); + assert_eq!(summary.commits_inserted, 1); + assert_eq!(summary.query_inserted, 1); + assert_eq!(summary.compression_time_inserted, 1); + assert_eq!(summary.compression_size_inserted, 1); + assert_eq!(summary.random_access_inserted, 1); + + let conn = Connection::open(&target).unwrap(); + let count = |table: &str| -> i64 { + conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap() + }; + assert_eq!(count("commits"), 1); + assert_eq!(count("query_measurements"), 1); + assert_eq!(count("compression_times"), 1); + assert_eq!(count("compression_sizes"), 1); + assert_eq!(count("random_access_times"), 1); + + // Spot-check the v3 column values for each kind. + let (engine, format, query_idx, value_ns): (String, String, i32, i64) = conn + .query_row( + "SELECT engine, format, query_idx, value_ns FROM query_measurements", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); + assert_eq!(query_idx, 7); + assert_eq!(value_ns, 42000); + + let (dataset, format, op): (String, String, String) = conn + .query_row( + "SELECT dataset, format, op FROM compression_times", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(op, "encode"); + + let (dataset, format, value_bytes): (String, String, i64) = conn + .query_row( + "SELECT dataset, format, value_bytes FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(value_bytes, 1024); + + let (dataset, format): (String, String) = conn + .query_row("SELECT dataset, format FROM random_access_times", [], |r| { + Ok((r.get(0)?, r.get(1)?)) + }) + .unwrap(); + assert_eq!(dataset, "taxi/take"); + assert_eq!(format, "parquet"); +} + +#[test] +fn dedup_collision_keeps_one_row() { + // Two data.json.gz lines whose query-measurement dim columns are + // identical (same commit / dataset / engine / format / query_idx, + // and `storage` collapses to "nvme" since `storage` is unset). + // Different `value`s. The accumulator's HashSet + // should drop the second one and bump `summary.deduped`. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 2, "summary={summary}"); + assert_eq!(summary.query_inserted, 1, "summary={summary}"); + assert_eq!(summary.deduped, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM query_measurements", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); +} + +#[test] +fn dedup_with_conflicting_value_ns_is_counted() { + // Same dim columns, different `value`s. Dedup keeps the first + // and bumps `deduped_with_conflict` because the dropped row's + // value_ns differed from the kept row's. This is the signal we + // care about when watching for silent value-corruption across + // duplicated v2 emissions. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 1, "summary={summary}"); +} + +#[test] +fn dedup_with_matching_value_ns_does_not_count_conflict() { + // Same dim columns AND identical `value`s. Dedup still drops the + // duplicate, but `deduped_with_conflict` stays 0. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 0, "summary={summary}"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge() { + // A `vortex size/tpch` record from data.json.gz and a + // file-sizes-tpch-nvme.json.gz row covering the same (commit, + // dataset, format, SF) tuple should produce the *same* + // measurement_id so the in-memory accumulator merges them into + // one row instead of two. + // + // Both sources use scale_factor "1.0", which both code paths + // filter out → dataset_variant: None on both sides → matching mid. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"1.0"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes): (i64, i64) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds value_bytes=200, file-sizes adds 100. + assert_eq!(value_bytes, 300); +} + +#[test] +fn empty_author_email_stored_as_null() { + // v2 sometimes wrote `""` for blank author/email/message. The + // migrator normalizes those to None so DuckDB stores SQL NULL, + // letting the UI distinguish "missing metadata" from "empty + // string". Here author.email is "" — verify the column is NULL, + // not the empty string. + const COMMITS: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture","author":{"name":"A","email":""},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + + let src_dir = build_fixture(COMMITS, "", &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let is_null: bool = conn + .query_row( + "SELECT author_email IS NULL FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!(is_null, "empty author.email must store as SQL NULL"); + + // Non-empty fields still round-trip as strings. + let committer_email: String = conn + .query_row( + "SELECT committer_email FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(committer_email, "c@example.com"); +} + +#[test] +fn open_target_db_removes_orphan_wal() { + // A `.wal` left from a previous crash with no main file present + // must still be removed so the next run starts from a known-empty + // state. Otherwise DuckDB can replay stale WAL into the fresh DB + // and corrupt subsequent inserts. + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + let wal = target_dir.path().join("v3.duckdb.wal"); + std::fs::write(&wal, b"orphan-wal-bytes").unwrap(); + assert!(wal.exists(), "precondition: orphan wal staged"); + assert!(!target.exists(), "precondition: no main db file"); + + let _conn = migrate::open_target_db(&target).unwrap(); + + // The migrator opens the DB after sweeping the WAL; DuckDB may + // recreate its own wal under load, but our pre-existing orphan + // bytes must not survive the sweep. We assert by content: either + // the path is missing, or its contents differ from the orphan we + // staged. + if wal.exists() { + let now = std::fs::read(&wal).unwrap(); + assert_ne!( + now, b"orphan-wal-bytes", + "orphan wal bytes must not survive open_target_db" + ); + } +} + +#[test] +fn file_sizes_unknown_id_falls_back_to_unknown_prefix() { + // A file-sizes-*.json.gz whose id isn't in + // `KNOWN_FILE_SIZES_SUITES`, with an empty `benchmark` field, used + // to surface as a bare id like `mystery-suite` and render as a + // dataset name. The migrator now prefixes those with `unknown:` + // so the UI can flag them. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-mystery-suite.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "unknown:mystery-suite"); +} + +#[test] +fn file_sizes_known_id_uses_id_directly() { + // For a KNOWN_FILE_SIZES_SUITES id, the fallback path keeps the + // raw id (no `unknown:` prefix). `clickbench-nvme` is on the list. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "clickbench-nvme"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge_with_canonical_sf() { + // Same logical SF written as `"10"` on the data.json.gz side and + // `"10.0"` on the file-sizes side. Both paths must canonicalize + // to `"10"` so the rows share a `measurement_id` and merge into + // one compression_sizes row. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"10"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"10.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme-10.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes, dataset_variant): (i64, i64, String) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes), MAX(dataset_variant) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds 200, file-sizes adds 100. + assert_eq!(value_bytes, 300); + assert_eq!(dataset_variant, "10"); +} + +#[test] +fn summary_counts_match_actual_rows_on_success() { + // Sister test to migrate::tests::flush_all_does_not_overcount_on_failure. + // On a fully successful run, the post-flush summary counters must + // equal `SELECT COUNT(*)` from each fact table. This is the + // invariant the flush-after-count refactor preserves. + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let actual = |table: &str| -> u64 { + let n: i64 = conn + .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap(); + n as u64 + }; + assert_eq!(summary.query_inserted, actual("query_measurements")); + assert_eq!( + summary.compression_time_inserted, + actual("compression_times") + ); + assert_eq!( + summary.compression_size_inserted, + actual("compression_sizes") + ); + assert_eq!( + summary.random_access_inserted, + actual("random_access_times") + ); +} + +#[test] +fn file_sizes_sum_into_one_row() { + // Two file-sizes rows sharing (commit, benchmark, format, + // scale_factor) and value_bytes 100 + 200 must collapse to a + // single compression_sizes row with 300. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-1.vortex","size_bytes":200} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.file_size_inserted, 2, "summary={summary}"); + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); + let value_bytes: i64 = conn + .query_row("SELECT value_bytes FROM compression_sizes", [], |r| { + r.get(0) + }) + .unwrap(); + assert_eq!(value_bytes, 300); +} diff --git a/benchmarks-website/planning/00-overview.md b/benchmarks-website/planning/00-overview.md new file mode 100644 index 00000000000..c6c3e05c57f --- /dev/null +++ b/benchmarks-website/planning/00-overview.md @@ -0,0 +1,104 @@ + + +# 00 - Overview + +## What we're building + +A replacement for the current `bench.vortex.dev` site. The new +stack is a **single Rust binary** (axum + maud + duckdb-rs) that +owns a **DuckDB database** on local disk and serves the website +plus an `/api/ingest` route. CI eventually POSTs new benchmark +results there. There is no separate ingester service, no S3 +coordination layer for writes, no client-side WASM. + +The server crate is `vortex-bench-server` at +`benchmarks-website/server/`. + +## Phasing + +We build this in two phases. **Plan only the first.** + +### Alpha (this plan) + +The smallest end-to-end loop that proves the design: + +1. **Schema** locked enough to ingest one benchmark result. +2. **Server**: open DuckDB, accept a bearer-token-authenticated POST, + serve a couple of read routes. +3. **Emitter**: `vortex-bench --gh-json-v3` + a tiny POST script. +4. **Web UI**: one landing page + one chart page rendered against a + fixture DB. + +That's it. No production deploy, no historical data import, no CI +workflow integration, no admin tooling, no schema migration +framework, no auth beyond the shared bearer token. All of those +live in [`deferred.md`](./deferred.md). + +The alpha runs on a developer machine. v2 keeps running in +production unchanged. There is no cutover in alpha. + +### Phase 2 and beyond + +Once the alpha loop is green, we layer in production deploy, +historical migration, CI dual-write, and the rest of the v2-parity +work. Stubs are in [`deferred.md`](./deferred.md). + +## Architecture (alpha) + +One process, one DB file. The server is the API and the website. +The emitter writes JSONL of bare records; a small POST script +wraps and uploads them. CI isn't wired up yet; ingest happens +manually during alpha. + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. + +| Component | Plan | Owns | +|---|---|---| +| Server | [components/server.md](./components/server.md) | DuckDB open + schema, bearer-auth ingest, read routes, HTML routes mounted from web-ui | +| Emitter | [components/emitter.md](./components/emitter.md) | `vortex-bench --gh-json-v3` + the post-ingest script | +| Web UI | [components/web-ui.md](./components/web-ui.md) | Landing page + chart page, against a fixture DuckDB | + +### Dependencies + +The schema feeds all three components. The contracts feed the +server and the emitter. With both stable, **all three components +can be worked on in parallel**. + +## Goals + +In priority order: + +1. **End-to-end alpha loop works.** Emit → POST → store → render. +2. **Schema is the right shape.** Five fact tables (one per + measurement family) plus a `commits` dim. See + [`01-schema.md`](./01-schema.md). +3. **Each component is small enough that one agent can finish it + in one PR.** No mega-PRs. + +Cutover, parity, and "faster than v2" are explicit non-goals at +alpha; they come back in phase 2. + +## Shared docs + +- [`00-overview.md`](./00-overview.md) (this file) +- [`01-schema.md`](./01-schema.md) - the five fact tables + `commits` +- [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP error + matrix + auth header +- [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables +- [`decisions.md`](./decisions.md) - resolved decisions +- [`deferred.md`](./deferred.md) - phase-2 stubs + +## Status of v2 during alpha + +v2 stays in production untouched. Do not edit +`benchmarks-website/server.js`, `benchmarks-website/src/`, or any +other v2 files at `benchmarks-website/` top level. v3 lives in the +sibling subdirectory at `benchmarks-website/server/` +(`vortex-bench-server` crate). diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md new file mode 100644 index 00000000000..dfc6b05ba27 --- /dev/null +++ b/benchmarks-website/planning/01-schema.md @@ -0,0 +1,228 @@ + + +# 01 - DuckDB schema (alpha) + +The persistent data model. **One `commits` dim table plus five fact +tables, one per measurement family.** No lookup tables, no views, no +migration framework; those are deferred (see +[`deferred.md`](./deferred.md)). + +## Design principles + +1. **One fact table per (dim shape, value shape).** A row in any + fact table has every value column populated; NULLs only appear + in genuinely optional dimensions. +2. **No discriminator columns spanning families.** No `metric_kind` + enum forcing five shapes into one row. +3. **No JSON escape hatch.** New benchmark parameters become real + columns. Adding a nullable column is cheap; the readability win + is worth it. +4. **Hashed primary key per table.** Each fact table has a + `measurement_id` that is a deterministic 64-bit hash of + `commit_sha` plus that table's dimensional tuple. Including + `commit_sha` makes every (commit, dim) pair a distinct row - + that's what the chart pages render as a time series. + Server-internal; not on the wire. +5. **`commits` is the only dim table.** Engine, format, dataset, + etc. stay as inline strings; DuckDB's dictionary encoding makes + a lookup table pointless. +6. **Ratios are not stored.** Computed at query time from + `compression_sizes`. + +## Why five fact tables, not one + +The five families have genuinely different shapes: + +| Table | Shape sketch | +|---|---| +| `query_measurements` | dataset + query_idx + engine + format + storage → timing **and** memory | +| `compression_times` | dataset + format + op∈{encode,decode} → timing | +| `compression_sizes` | dataset + format → bytes | +| `random_access_times` | dataset + format → timing (different dataset namespace) | +| `vector_search_runs` | dataset + layout + flavor + threshold → timing + counters | + +Forcing them into one table either bloats every row with columns +that are NULL for ~99% of rows (`layout`, `flavor`, `threshold`, +`matches`, `rows_scanned`, `bytes_scanned`) or splits scan results +across multiple rows that have to be re-joined to render one chart. + +## Group / chart / series fit + +The render-time view used by `/api/groups` and `/api/chart/:slug` +is mechanically derivable per table: + +| Table | Group key | Chart key | Series key | +|---|---|---|---| +| `query_measurements` | `(dataset, dataset_variant, scale_factor, storage)` | `(dataset, query_idx)` | `(engine, format)` | +| `compression_times` | constant `"Compression"` | `(dataset, dataset_variant)` | `(format, op)` | +| `compression_sizes` | constant `"Compression Size"` | `(dataset, dataset_variant)` | `format` | +| `random_access_times` | constant `"Random Access"` | `dataset` | `format` | +| `vector_search_runs` | `(dataset, layout)` | `(dataset, layout, threshold)` | `flavor` | + +The classifier logic in v2's `v2-classifier.js` mostly disappears - +each table already knows what suite it represents. + +## Tables + +DDL is the server's call. Below is the column contract: name, type +family, and whether it's NOT NULL. The server agent picks exact +DuckDB types, indexes, and constraint syntax. + +### `commits` (dim) + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `commit_sha` | string | yes (PK) | 40-hex lowercase | +| `timestamp` | timestamptz | yes | | +| `message` | string | optional | first line only | +| `author_name` | string | optional | | +| `author_email` | string | optional | | +| `committer_name` | string | optional | | +| `committer_email` | string | optional | | +| `tree_sha` | string | yes | | +| `url` | string | yes | | + +Populated from the envelope on every `/api/ingest` call. + +### `query_measurements` + +SQL query suites: TPC-H, TPC-DS, ClickBench, StatPopGen, +PolarSignals, Fineweb, GhArchive, Public-BI. Memory columns are +populated when the run was instrumented for memory; NULL otherwise. +Timing and memory share the row because they're produced together +for the same query execution. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | hash of dim tuple | +| `commit_sha` | string | yes | FK to `commits` | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | int32 | yes | 1-based | +| `storage` | string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | int64 | yes | median timing, ns | +| `all_runtimes_ns` | list<int64> | yes | per-iteration timings | +| `peak_physical` | int64 | optional | bytes | +| `peak_virtual` | int64 | optional | bytes | +| `physical_delta` | int64 | optional | bytes | +| `virtual_delta` | int64 | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +### `compression_times` + +Encode/decode timings from `compress-bench`. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | string | yes | `encode` or `decode` | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `compression_sizes` + +On-disk sizes from `compress-bench`. One-shot, no per-iteration data. +Compression ratios in v2 (`vortex:parquet-zstd ratio/...`) are a +SELECT over this table joined to itself; they're not stored. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | int64 | yes | | + +### `random_access_times` + +Take-time timings from `random-access-bench`. Different dataset +namespace from `compression_times` - kept in its own table so +dataset filters never have to disambiguate which suite a row +belongs to. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `format` | string | yes | | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_runs` + +Cosine-similarity scans from `vector-search-bench`. The only family +that emits a timing **plus side counters** for the same scan; +keeping them in one row avoids a 1:N split that has to be re-joined +on read. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | double | yes | cosine threshold | +| `value_ns` | int64 | yes | per-scan wall time | +| `all_runtimes_ns` | list<int64> | yes | | +| `matches` | int64 | yes | | +| `rows_scanned` | int64 | yes | | +| `bytes_scanned` | int64 | yes | | +| `iterations` | int32 | yes | not part of the dim hash | +| `env_triple` | string | optional | | + +## `measurement_id` hash + +Per-table xxhash64 over `commit_sha` plus that table's dimensional +tuple. Including `commit_sha` makes every (commit, dim) pair a +distinct row, which is what the chart pages render as a time +series. The hash is **server-internal** - the wire never carries +it. The server's INSERT path computes it before each +`INSERT ... ON CONFLICT DO UPDATE`, which gives idempotent upsert +on re-emission of the same (commit, dim) pair. Encoding details +(input order, NULL handling, byte layout) are the server's call, +since the value never crosses a process boundary. + +When the historical migrator lands (deferred), it reuses the +server's hash function via a shared crate. + +## Storage values + +`storage` is `'nvme'` or `'s3'`. Legacy `gcs` is dropped. Only +`query_measurements` carries `storage` - the other families don't +fan out by storage backend. + +## Schema changes during alpha + +There is no migration framework. If you change the schema: + +1. Update this doc. +2. Update the server's DDL. +3. Delete any local `bench.duckdb` and re-run. + +A real forward-only migration framework lands post-alpha. See +[`deferred.md`](./deferred.md). + +## What's intentionally NOT here (deferred) + +- `schema_meta` and migration framework. +- `known_engines` / `known_formats` / `known_datasets` lookup + tables and seed SQL. +- Views (`v_compression_ratios`, `v_latest_per_group`, etc.). +- Pre-downsampled aliases. +- A `microbench_runs` table - reserved as the next family to add + when microbench results start landing. diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md new file mode 100644 index 00000000000..5d995c7120d --- /dev/null +++ b/benchmarks-website/planning/02-contracts.md @@ -0,0 +1,210 @@ + + +# 02 - Wire contracts (alpha) + +The cross-component glue between the emitter, the POST script, and +the server. Wire-format only - implementations are local to each +component. + +If two components disagree about a shape, **this file is right** +and both update. + +## Records are discriminated by `kind` + +Each record on the wire carries a `kind` field that picks one of +the [five fact tables](./01-schema.md#tables). The emitter never +decides "what column" - it decides "what kind", and the rest of the +row is that kind's flat field set. + +| `kind` | Destination table | +|---|---| +| `query_measurement` | `query_measurements` | +| `compression_time` | `compression_times` | +| `compression_size` | `compression_sizes` | +| `random_access_time` | `random_access_times` | +| `vector_search_run` | `vector_search_runs` | + +**Unknown `kind` values cause a 400.** Unknown fields within a known +`kind` also cause a 400. Version skew should fail loudly. + +## Per-kind record shapes + +All shared metadata first; per-kind fields after. + +### `query_measurement` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"query_measurement"` | yes | discriminator | +| `commit_sha` | string | yes | 40-hex lowercase | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | integer | yes | 1-based | +| `storage` | enum string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | integer | yes | median timing, ns | +| `all_runtimes_ns` | array<integer> | yes | per-iteration timings (may be empty) | +| `peak_physical` | integer | optional | bytes | +| `peak_virtual` | integer | optional | bytes | +| `physical_delta` | integer | optional | bytes | +| `virtual_delta` | integer | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +The four memory fields are populated together (all four or none). + +### `compression_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | enum string | yes | `encode` or `decode` | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `compression_size` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_size"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | integer | yes | | + +### `random_access_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"random_access_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | random-access dataset name (e.g. `chimp`, `taxi`) | +| `format` | string | yes | | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_run` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"vector_search_run"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | number | yes | cosine threshold | +| `value_ns` | integer | yes | per-scan wall time (median of iterations) | +| `all_runtimes_ns` | array<integer> | yes | | +| `matches` | integer | yes | | +| `rows_scanned` | integer | yes | | +| `bytes_scanned` | integer | yes | | +| `iterations` | integer | yes | | +| `env_triple` | string | optional | | + +## Ingest envelope + +`/api/ingest` accepts one envelope per POST. The envelope wraps a +heterogeneous batch of records (any mix of `kind`s). Required +top-level fields: + +- `run_meta`: object with `benchmark_id` (string), `schema_version` + (integer; `1` at alpha), `started_at` (RFC 3339 timestamp). +- `commit`: object with the columns of the [`commits` + table](./01-schema.md#commits-dim), keyed by their column names + with `commit_sha` renamed to `sha`. The server upserts this row + before applying records. +- `records`: array of per-`kind` records as defined above. + +`vortex-bench --gh-json-v3 ` writes JSONL of bare records +only. The envelope (`run_meta` + `commit`) is added by the +post-ingest script before POSTing - this keeps the Rust emitter +dependency-light. + +The post-ingest script is responsible for filling the `commit` +fields. CI has the SHA from `${{ github.sha }}`; the rest comes +from `git show` or equivalent. See +[`components/emitter.md`](./components/emitter.md). + +## HTTP matrix for `POST /api/ingest` + +| Condition | Status | +|---|---| +| Happy path | 200 with `{ "inserted": N, "updated": M }` | +| Malformed JSON | 400 | +| Unknown `kind`, unknown field, or per-record validation failure | 400 with the offending record index | +| Missing/invalid bearer token | 401 | +| Schema version newer than server expects | 409 | +| Other server error | 500 | + +All-or-nothing per POST: a single failed record fails the whole +batch. The reported `inserted` and `updated` counts are aggregated +across all five tables. + +## Authentication header + +```text +Authorization: Bearer +``` + +Compared with constant-time equality on the server. Token comes from +the `INGEST_BEARER_TOKEN` env var. + +## Slug grammar (server ↔ web-ui) + +The web-ui receives slugs from `/api/groups` and feeds them back +into `/api/chart/:slug`. Slugs are **opaque strings** as far as the +web-ui is concerned: it never parses or constructs them itself, +only echoes what the API returned. The server is free to choose any +slug format, change it without breaking the web-ui, or make it +debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is +"`/api/chart/:slug` accepts any slug `/api/groups` returned." + +## Read API + +Four JSON routes today. Field shapes are not binding; refine during +implementation. + +### `GET /api/groups` + +A flat list of distinct group keys derivable from the data, with +just enough metadata to link to a chart. The server walks each fact +table to produce the group keys defined in +[`01-schema.md`](./01-schema.md#group--chart--series-fit). Every +chart entry includes a `slug` that round-trips through +`/api/chart/:slug`, and every group has its own `slug` that +round-trips through `/api/group/:slug`. + +### `GET /api/chart/:slug` + +Returns the data for one chart: a `display_name`, a `unit`, an +ordered `commits` list (sha + timestamp + first-line message + url), +and a `series` map keyed by series name where each value is an +array aligned to `commits` (with `null` for missing data points). +Accepts `?n=&y=&mode=&hidden=` to scope the commit window and +configure the rendered view. + +### `GET /api/group/:slug` + +Returns every chart in a group as a single batch payload, in render +order. Used by the `/group/{slug}` HTML page and (today) by the +landing page hydration path. Same query parameters as +`/api/chart/:slug`. + +### `GET /health` + +Returns `{ status, db_path, schema_version, latest_commit_timestamp, +row_counts }`. Cheap; suitable for load-balancer health checks. + +Per-commit page, range queries, and the rest of the read API are +deferred. See [`deferred.md`](./deferred.md). diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md new file mode 100644 index 00000000000..2f7c3710e92 --- /dev/null +++ b/benchmarks-website/planning/AGENTS.md @@ -0,0 +1,171 @@ + + +# AGENTS.md - benchmarks-website v3 + +Brief for coding agents working on the v3 rewrite of `bench.vortex.dev`. Keep this file short. +Detail belongs in component plans. + +## Status + +Alpha is shipped. The v3 server, migrator, and inline-charts UI are all merged to +`ct/benchmarks-v3`. The current focus is **production readiness**: secrets, CI ingestion wiring, +smoke-testing on a real host, the DNS flip, and v2 cleanup. See [`README.md`](./README.md) for the +live punch list. + +The v2 site (top-level files in `benchmarks-website/`: `server.js`, `src/`, `package.json`, +`index.html`, `Dockerfile`, `docker-compose.yml`, `ec2-init.txt`, etc.) is still in production on +`bench.vortex.dev` and **stays running unchanged** until the DNS flip. The v3 server lives alongside +it as `vortex-bench-server` at `benchmarks-website/server/`. + +## Architecture in 10 bullets + +- Single Rust binary: `axum` (HTTP) + `maud` (SSR HTML) + embedded `duckdb-rs`. All static assets + (`chart.umd.js`, `chart-init.js`, `style.css`) are `include_bytes!`'d into the binary. No CDN. + A `tower-http` `CompressionLayer` wraps every response (gzip/brotli). +- One DuckDB file on local disk holds five fact tables (compression time, query measurement, vector + search, RAG, random access) plus a `commits` dim table. Schema in + [`01-schema.md`](./01-schema.md). +- One ingest endpoint: `POST /api/ingest`, gated by a static bearer token from the + `INGEST_BEARER_TOKEN` env var. Wire shapes in [`02-contracts.md`](./02-contracts.md). +- Three HTML routes — `/`, `/chart/{slug}`, `/group/{slug}` — and four JSON routes — + `GET /api/groups`, `GET /api/chart/{slug}`, `GET /api/group/{slug}`, `GET /health` — all served + from the same binary. +- `ChartKey` and `GroupKey` enums round-trip through URLs as `.` + slugs. No DB lookup required to decode a URL. +- Charts render inline on the landing page. Each `` is paired with a + `"}"#; + let out = escape_json_for_script(input); + assert!(!out.contains(" assert_eq!(n.get(), 25), + CommitWindow::All => panic!(), + } + let ui = UiQuery { + n: Some("all".into()), + ..Default::default() + }; + assert!(matches!(ui.fetch_window(), CommitWindow::All)); + } + + #[test] + fn filter_state_parses_csv_and_dedupes() { + let ui = UiQuery { + engine: Some("duckdb, datafusion ,duckdb".into()), + format: Some(",, vortex-file-compressed ,".into()), + ..Default::default() + }; + let f = ui.filter_state(); + assert_eq!(f.engines, vec!["duckdb", "datafusion"]); + assert_eq!(f.formats, vec!["vortex-file-compressed"]); + } + + #[test] + fn filter_state_default_is_empty() { + let f = UiQuery::default().filter_state(); + assert!(f.engines.is_empty()); + assert!(f.formats.is_empty()); + } +} diff --git a/benchmarks-website/server/src/ingest.rs b/benchmarks-website/server/src/ingest.rs new file mode 100644 index 00000000000..1edf3acf126 --- /dev/null +++ b/benchmarks-website/server/src/ingest.rs @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! `POST /api/ingest` handler. +//! +//! All-or-nothing per envelope: every record is upserted in a single DuckDB +//! transaction or none of them are. The reported `inserted`/`updated` counts +//! aggregate across all five fact tables. + +use anyhow::Context as _; +use anyhow::Result; +use axum::Json; +use axum::extract::State; +use axum::response::IntoResponse; +use duckdb::Connection; +use duckdb::params; +use serde::Serialize; +use serde_json::Value; + +use crate::app::AppState; +use crate::db::measurement_id_compression_size; +use crate::db::measurement_id_compression_time; +use crate::db::measurement_id_query; +use crate::db::measurement_id_random_access; +use crate::db::measurement_id_vector_search; +use crate::db::{self}; +use crate::error::IngestError; +use crate::records::CommitInfo; +use crate::records::Envelope; +use crate::records::QueryMeasurement; +use crate::records::Record; +use crate::records::VectorSearchRun; +use crate::schema::SCHEMA_VERSION; + +/// Successful ingest response body. +#[derive(Debug, Serialize)] +pub struct IngestResponse { + pub inserted: u64, + pub updated: u64, +} + +/// Handler for `POST /api/ingest`. Bearer auth is enforced by the +/// [`crate::auth::require_bearer`] middleware, not here. +pub async fn handle( + State(state): State, + body: Json, +) -> Result { + let Json(value) = body; + let envelope: Envelope = + serde_json::from_value(value).map_err(|e| IngestError::Malformed(e.to_string()))?; + validate_envelope(&envelope)?; + + let response = db::run_blocking(&state.db, move |conn| apply_envelope(conn, envelope)) + .await + .map_err(|err| match err.downcast::() { + Ok(ingest) => ingest, + Err(other) => IngestError::Internal(other), + })?; + Ok(Json(response)) +} + +fn validate_envelope(env: &Envelope) -> Result<(), IngestError> { + if env.run_meta.schema_version > SCHEMA_VERSION { + return Err(IngestError::SchemaVersionTooNew { + expected: SCHEMA_VERSION, + got: env.run_meta.schema_version, + }); + } + if env.run_meta.schema_version < SCHEMA_VERSION { + return Err(IngestError::Malformed(format!( + "schema_version {} is older than server's {}", + env.run_meta.schema_version, SCHEMA_VERSION + ))); + } + Ok(()) +} + +fn apply_envelope(conn: &mut Connection, env: Envelope) -> Result { + let tx = conn.transaction().context("begin transaction")?; + + upsert_commit(&tx, &env.commit).context("upsert commit")?; + + let mut inserted = 0u64; + let mut updated = 0u64; + for (idx, record) in env.records.iter().enumerate() { + if record.commit_sha() != env.commit.sha { + return Err(IngestError::Record { + index: idx, + message: format!( + "record commit_sha {:?} does not match envelope commit.sha {:?}", + record.commit_sha(), + env.commit.sha, + ), + } + .into()); + } + match apply_record(&tx, record) { + Ok(was_update) => { + if was_update { + updated += 1; + } else { + inserted += 1; + } + } + Err(RecordError::Validation(msg)) => { + return Err(IngestError::Record { + index: idx, + message: msg, + } + .into()); + } + Err(RecordError::Internal(err)) => { + return Err(err.context(format!("applying record at index {idx}"))); + } + } + } + + tx.commit().context("commit transaction")?; + + Ok(IngestResponse { inserted, updated }) +} + +fn upsert_commit(tx: &duckdb::Transaction<'_>, c: &CommitInfo) -> Result<()> { + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + c.sha, + c.timestamp, + c.message, + c.author_name, + c.author_email, + c.committer_name, + c.committer_email, + c.tree_sha, + c.url, + ], + )?; + Ok(()) +} + +/// Per-record error split: validation failures carry a message that the +/// caller turns into an [`IngestError::Record`] with the right index; +/// anything else bubbles up as a 500. +enum RecordError { + Validation(String), + Internal(anyhow::Error), +} + +impl From for RecordError { + fn from(err: anyhow::Error) -> Self { + Self::Internal(err) + } +} + +impl From for RecordError { + fn from(err: duckdb::Error) -> Self { + Self::Internal(err.into()) + } +} + +fn apply_record(tx: &duckdb::Transaction<'_>, record: &Record) -> Result { + match record { + Record::QueryMeasurement(r) => insert_query_measurement(tx, r), + Record::CompressionTime(r) => { + let mid = measurement_id_compression_time(r); + let was_update = exists(tx, "compression_times", mid)?; + tx.execute( + r#" + INSERT INTO compression_times ( + measurement_id, commit_sha, dataset, dataset_variant, + format, op, value_ns, all_runtimes_ns, env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.format, + r.op, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.env_triple, + ], + )?; + Ok(was_update) + } + Record::CompressionSize(r) => { + let mid = measurement_id_compression_size(r); + let was_update = exists(tx, "compression_sizes", mid)?; + tx.execute( + r#" + INSERT INTO compression_sizes ( + measurement_id, commit_sha, dataset, dataset_variant, + format, value_bytes + ) VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_bytes = excluded.value_bytes + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.format, + r.value_bytes, + ], + )?; + Ok(was_update) + } + Record::RandomAccessTime(r) => { + let mid = measurement_id_random_access(r); + let was_update = exists(tx, "random_access_times", mid)?; + tx.execute( + r#" + INSERT INTO random_access_times ( + measurement_id, commit_sha, dataset, format, + value_ns, all_runtimes_ns, env_triple + ) VALUES (?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.format, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.env_triple, + ], + )?; + Ok(was_update) + } + Record::VectorSearchRun(r) => insert_vector_search(tx, r), + } +} + +fn insert_query_measurement( + tx: &duckdb::Transaction<'_>, + r: &QueryMeasurement, +) -> Result { + if !matches!(r.storage.as_str(), "nvme" | "s3") { + return Err(RecordError::Validation(format!( + "storage must be 'nvme' or 's3', got {:?}", + r.storage + ))); + } + if !memory_quartet_consistent(r) { + return Err(RecordError::Validation( + "memory fields must be populated together (all four or none)".into(), + )); + } + let mid = measurement_id_query(r); + let was_update = exists(tx, "query_measurements", mid)?; + tx.execute( + r#" + INSERT INTO query_measurements ( + measurement_id, commit_sha, dataset, dataset_variant, scale_factor, + query_idx, storage, engine, format, + value_ns, all_runtimes_ns, + peak_physical, peak_virtual, physical_delta, virtual_delta, + env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + peak_physical = excluded.peak_physical, + peak_virtual = excluded.peak_virtual, + physical_delta = excluded.physical_delta, + virtual_delta = excluded.virtual_delta, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.scale_factor, + r.query_idx, + r.storage, + r.engine, + r.format, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.peak_physical, + r.peak_virtual, + r.physical_delta, + r.virtual_delta, + r.env_triple, + ], + )?; + Ok(was_update) +} + +fn insert_vector_search( + tx: &duckdb::Transaction<'_>, + r: &VectorSearchRun, +) -> Result { + let mid = measurement_id_vector_search(r); + let was_update = exists(tx, "vector_search_runs", mid)?; + tx.execute( + r#" + INSERT INTO vector_search_runs ( + measurement_id, commit_sha, dataset, layout, flavor, threshold, + value_ns, all_runtimes_ns, matches, rows_scanned, bytes_scanned, + iterations, env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + matches = excluded.matches, + rows_scanned = excluded.rows_scanned, + bytes_scanned = excluded.bytes_scanned, + iterations = excluded.iterations, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.layout, + r.flavor, + r.threshold, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.matches, + r.rows_scanned, + r.bytes_scanned, + r.iterations, + r.env_triple, + ], + )?; + Ok(was_update) +} + +fn exists(tx: &duckdb::Transaction<'_>, table: &str, mid: i64) -> Result { + // Table name is from a closed enum of literals above, never user input. + let sql = format!("SELECT 1 FROM {table} WHERE measurement_id = ? LIMIT 1"); + let mut stmt = tx.prepare(&sql)?; + let exists = stmt.exists(params![mid])?; + Ok(exists) +} + +fn runtimes_literal(values: &[i64]) -> String { + let mut s = String::with_capacity(values.len() * 8 + 2); + s.push('['); + for (i, v) in values.iter().enumerate() { + if i > 0 { + s.push(','); + } + s.push_str(&v.to_string()); + } + s.push(']'); + s +} + +fn memory_quartet_consistent(r: &QueryMeasurement) -> bool { + let any = r.peak_physical.is_some() + || r.peak_virtual.is_some() + || r.physical_delta.is_some() + || r.virtual_delta.is_some(); + let all = r.peak_physical.is_some() + && r.peak_virtual.is_some() + && r.physical_delta.is_some() + && r.virtual_delta.is_some(); + !any || all +} diff --git a/benchmarks-website/server/src/lib.rs b/benchmarks-website/server/src/lib.rs new file mode 100644 index 00000000000..ae0a19b6cb9 --- /dev/null +++ b/benchmarks-website/server/src/lib.rs @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Vortex benchmarks website v3 (alpha) server. +//! +//! This crate is a leaf binary that owns a DuckDB file on local disk, +//! accepts authenticated `/api/ingest` POSTs, and serves a small read API +//! plus the HTML pages contributed by the web-ui component. + +pub mod api; +pub mod app; +pub mod auth; +pub mod db; +pub mod error; +pub mod html; +pub mod ingest; +pub mod records; +pub mod schema; +pub mod slug; diff --git a/benchmarks-website/server/src/main.rs b/benchmarks-website/server/src/main.rs new file mode 100644 index 00000000000..93768fdbfca --- /dev/null +++ b/benchmarks-website/server/src/main.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary entrypoint for the bench.vortex.dev v3 alpha server. + +use std::env; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use tracing_subscriber::EnvFilter; + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let db_path: PathBuf = env::var("VORTEX_BENCH_DB") + .unwrap_or_else(|_| "bench.duckdb".to_string()) + .into(); + let bearer_token = + env::var("INGEST_BEARER_TOKEN").context("INGEST_BEARER_TOKEN env var must be set")?; + let bind_addr = env::var("VORTEX_BENCH_BIND").unwrap_or_else(|_| "127.0.0.1:3000".to_string()); + + let state = vortex_bench_server::app::AppState::open(&db_path, bearer_token) + .with_context(|| format!("opening DuckDB at {}", db_path.display()))?; + let app = vortex_bench_server::app::router(state); + + let listener = tokio::net::TcpListener::bind(&bind_addr) + .await + .with_context(|| format!("binding to {bind_addr}"))?; + tracing::info!( + addr = %listener.local_addr()?, + db = %db_path.display(), + "bench server listening" + ); + axum::serve(listener, app).await?; + Ok(()) +} diff --git a/benchmarks-website/server/src/records.rs b/benchmarks-website/server/src/records.rs new file mode 100644 index 00000000000..0217675cdea --- /dev/null +++ b/benchmarks-website/server/src/records.rs @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes for `POST /api/ingest`. +//! +//! These types deserialize the ingest envelope defined in +//! `benchmarks-website/planning/02-contracts.md`. Each variant of [`Record`] +//! is gated by `#[serde(deny_unknown_fields)]`, so unknown fields produce +//! a 400 with the offending record's index. + +use serde::Deserialize; + +/// One ingest payload. +/// +/// `run_meta` and `commit` are added by the post-ingest script around the +/// JSONL of bare records the Rust emitter writes. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Envelope { + pub run_meta: RunMeta, + pub commit: CommitInfo, + pub records: Vec, +} + +/// Run-level metadata. `schema_version` is checked against +/// [`crate::schema::SCHEMA_VERSION`] before any record is processed. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RunMeta { + pub benchmark_id: String, + pub schema_version: i32, + pub started_at: String, +} + +/// Columns for the `commits` dim table. The wire field for `commit_sha` is +/// renamed to `sha` per the contract. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CommitInfo { + pub sha: String, + pub timestamp: String, + pub message: String, + pub author_name: String, + pub author_email: String, + pub committer_name: String, + pub committer_email: String, + pub tree_sha: String, + pub url: String, +} + +/// A single ingest record, discriminated by `kind`. +#[derive(Debug, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Record { + QueryMeasurement(QueryMeasurement), + CompressionTime(CompressionTime), + CompressionSize(CompressionSize), + RandomAccessTime(RandomAccessTime), + VectorSearchRun(VectorSearchRun), +} + +/// SQL query suite measurement (TPC-H, ClickBench, ...). +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct QueryMeasurement { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + #[serde(default)] + pub scale_factor: Option, + pub query_idx: i32, + pub storage: String, + pub engine: String, + pub format: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub peak_physical: Option, + #[serde(default)] + pub peak_virtual: Option, + #[serde(default)] + pub physical_delta: Option, + #[serde(default)] + pub virtual_delta: Option, + #[serde(default)] + pub env_triple: Option, +} + +/// Encode/decode timing from `compress-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CompressionTime { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + pub format: String, + pub op: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub env_triple: Option, +} + +/// On-disk size from `compress-bench`. One-shot, no per-iteration data. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CompressionSize { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + pub format: String, + pub value_bytes: i64, +} + +/// Take-time timing from `random-access-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RandomAccessTime { + pub commit_sha: String, + pub dataset: String, + pub format: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub env_triple: Option, +} + +/// Cosine-similarity scan from `vector-search-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct VectorSearchRun { + pub commit_sha: String, + pub dataset: String, + pub layout: String, + pub flavor: String, + pub threshold: f64, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + pub matches: i64, + pub rows_scanned: i64, + pub bytes_scanned: i64, + pub iterations: i32, + #[serde(default)] + pub env_triple: Option, +} + +impl Record { + /// The `commit_sha` referenced by this record. Every record carries one; + /// the server checks the envelope's `commit.sha` matches. + pub fn commit_sha(&self) -> &str { + match self { + Self::QueryMeasurement(r) => &r.commit_sha, + Self::CompressionTime(r) => &r.commit_sha, + Self::CompressionSize(r) => &r.commit_sha, + Self::RandomAccessTime(r) => &r.commit_sha, + Self::VectorSearchRun(r) => &r.commit_sha, + } + } + + /// The wire `kind` string. Useful for logging and error messages. + pub fn kind(&self) -> &'static str { + match self { + Self::QueryMeasurement(_) => "query_measurement", + Self::CompressionTime(_) => "compression_time", + Self::CompressionSize(_) => "compression_size", + Self::RandomAccessTime(_) => "random_access_time", + Self::VectorSearchRun(_) => "vector_search_run", + } + } +} diff --git a/benchmarks-website/server/src/schema.rs b/benchmarks-website/server/src/schema.rs new file mode 100644 index 00000000000..be00f86eac6 --- /dev/null +++ b/benchmarks-website/server/src/schema.rs @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! DuckDB schema DDL applied on server boot. +//! +//! See `benchmarks-website/planning/01-schema.md` for the column contracts. +//! There is no migration framework at alpha: if the schema changes, delete +//! the DuckDB file and restart. + +/// DDL for the `commits` dim plus the five fact tables. +pub const SCHEMA_DDL: &str = r#" +CREATE TABLE IF NOT EXISTS commits ( + commit_sha TEXT PRIMARY KEY NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + message TEXT, + author_name TEXT, + author_email TEXT, + committer_name TEXT, + committer_email TEXT, + tree_sha TEXT NOT NULL, + url TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS query_measurements ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + scale_factor TEXT, + query_idx INTEGER NOT NULL, + storage TEXT NOT NULL, + engine TEXT NOT NULL, + format TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + peak_physical BIGINT, + peak_virtual BIGINT, + physical_delta BIGINT, + virtual_delta BIGINT, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS compression_times ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + format TEXT NOT NULL, + op TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS compression_sizes ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + format TEXT NOT NULL, + value_bytes BIGINT NOT NULL +); + +CREATE TABLE IF NOT EXISTS random_access_times ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + format TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS vector_search_runs ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + layout TEXT NOT NULL, + flavor TEXT NOT NULL, + threshold DOUBLE NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + matches BIGINT NOT NULL, + rows_scanned BIGINT NOT NULL, + bytes_scanned BIGINT NOT NULL, + iterations INTEGER NOT NULL, + env_triple TEXT +); +"#; + +/// Schema version expected by the server. The ingest envelope's +/// `run_meta.schema_version` must match this exactly at alpha. +pub const SCHEMA_VERSION: i32 = 1; diff --git a/benchmarks-website/server/src/slug.rs b/benchmarks-website/server/src/slug.rs new file mode 100644 index 00000000000..911a6719484 --- /dev/null +++ b/benchmarks-website/server/src/slug.rs @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Opaque slugs for `/api/chart/:slug`. +//! +//! Per `02-contracts.md`, the web-ui treats slugs as opaque strings: it +//! receives them from `/api/groups` and feeds them back unchanged to +//! `/api/chart/:slug`. The server is free to choose any format. +//! +//! Slugs here are `.` where `` names the +//! source fact table and the JSON encodes the chart key. Round-tripping the +//! slug back gives a strongly-typed [`ChartKey`]. + +use anyhow::Context as _; +use anyhow::Result; +use anyhow::anyhow; +use base64::Engine as _; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use serde::Deserialize; +use serde::Serialize; + +const PREFIX_QUERY: &str = "qm"; +const PREFIX_COMPRESSION_TIME: &str = "ct"; +const PREFIX_COMPRESSION_SIZE: &str = "cs"; +const PREFIX_RANDOM_ACCESS: &str = "rat"; +const PREFIX_VECTOR_SEARCH: &str = "vsr"; + +const PREFIX_QUERY_GROUP: &str = "qmg"; +const PREFIX_COMPRESSION_TIME_GROUP: &str = "ctg"; +const PREFIX_COMPRESSION_SIZE_GROUP: &str = "csg"; +const PREFIX_RANDOM_ACCESS_GROUP: &str = "rag"; +const PREFIX_VECTOR_SEARCH_GROUP: &str = "vsg"; + +/// The strongly-typed chart key parsed from a slug. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "k")] +pub enum ChartKey { + /// `query_measurements` chart: `(dataset, query_idx)` per `01-schema.md`. + /// Group context (`dataset_variant`, `scale_factor`, `storage`) is carried + /// alongside so the slug fully specifies the chart. + QueryMeasurement { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + storage: String, + query_idx: i32, + }, + /// `compression_times` chart: `(dataset, dataset_variant)`. + CompressionTime { + dataset: String, + dataset_variant: Option, + }, + /// `compression_sizes` chart: `(dataset, dataset_variant)`. + CompressionSize { + dataset: String, + dataset_variant: Option, + }, + /// `random_access_times` chart: `dataset`. + RandomAccess { dataset: String }, + /// `vector_search_runs` chart: `(dataset, layout, threshold)`. + VectorSearch { + dataset: String, + layout: String, + threshold: f64, + }, +} + +impl ChartKey { + fn prefix(&self) -> &'static str { + match self { + Self::QueryMeasurement { .. } => PREFIX_QUERY, + Self::CompressionTime { .. } => PREFIX_COMPRESSION_TIME, + Self::CompressionSize { .. } => PREFIX_COMPRESSION_SIZE, + Self::RandomAccess { .. } => PREFIX_RANDOM_ACCESS, + Self::VectorSearch { .. } => PREFIX_VECTOR_SEARCH, + } + } + + /// Render the slug for this chart key. + pub fn to_slug(&self) -> String { + let json = serde_json::to_vec(self).expect("ChartKey is always JSON-serializable"); + format!("{}.{}", self.prefix(), URL_SAFE_NO_PAD.encode(json)) + } + + /// Parse a slug previously produced by [`Self::to_slug`]. + pub fn from_slug(slug: &str) -> Result { + let (_, encoded) = slug + .split_once('.') + .ok_or_else(|| anyhow!("slug missing '.' separator"))?; + let json = URL_SAFE_NO_PAD + .decode(encoded.as_bytes()) + .context("slug payload was not valid base64url")?; + let key: Self = serde_json::from_slice(&json).context("slug payload was not valid JSON")?; + Ok(key) + } +} + +/// Slug for a *group* of charts. Mirrors [`ChartKey`] but at the group +/// granularity: a group is a set of charts that share every dimension except +/// one (e.g. all 22 TPC-H queries at sf=1 on nvme). +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "k")] +pub enum GroupKey { + /// All `query_measurements` charts at one `(dataset, dataset_variant, + /// scale_factor, storage)`. Charts inside vary by `query_idx`. + QueryGroup { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + storage: String, + }, + /// Every compression-time chart (one per `(dataset, dataset_variant)`). + CompressionTimeGroup, + /// Every compression-size chart. + CompressionSizeGroup, + /// Every random-access chart. + RandomAccessGroup, + /// All vector-search charts at one `(dataset, layout)`. Charts inside + /// vary by `threshold`. + VectorSearchGroup { dataset: String, layout: String }, +} + +impl GroupKey { + fn prefix(&self) -> &'static str { + match self { + Self::QueryGroup { .. } => PREFIX_QUERY_GROUP, + Self::CompressionTimeGroup => PREFIX_COMPRESSION_TIME_GROUP, + Self::CompressionSizeGroup => PREFIX_COMPRESSION_SIZE_GROUP, + Self::RandomAccessGroup => PREFIX_RANDOM_ACCESS_GROUP, + Self::VectorSearchGroup { .. } => PREFIX_VECTOR_SEARCH_GROUP, + } + } + + /// Render the slug for this group key. + pub fn to_slug(&self) -> String { + let json = serde_json::to_vec(self).expect("GroupKey is always JSON-serializable"); + format!("{}.{}", self.prefix(), URL_SAFE_NO_PAD.encode(json)) + } + + /// Parse a slug previously produced by [`Self::to_slug`]. + pub fn from_slug(slug: &str) -> Result { + let (_, encoded) = slug + .split_once('.') + .ok_or_else(|| anyhow!("slug missing '.' separator"))?; + let json = URL_SAFE_NO_PAD + .decode(encoded.as_bytes()) + .context("slug payload was not valid base64url")?; + let key: Self = serde_json::from_slice(&json).context("slug payload was not valid JSON")?; + Ok(key) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn roundtrip(key: ChartKey) { + let slug = key.to_slug(); + let parsed = ChartKey::from_slug(&slug).expect("parses back"); + assert_eq!(parsed, key); + } + + #[test] + fn query_measurement_roundtrips() { + roundtrip(ChartKey::QueryMeasurement { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + storage: "nvme".into(), + query_idx: 7, + }); + } + + #[test] + fn vector_search_roundtrips() { + roundtrip(ChartKey::VectorSearch { + dataset: "cohere-large-10m".into(), + layout: "partitioned".into(), + threshold: 0.75, + }); + } + + #[test] + fn random_access_roundtrips() { + roundtrip(ChartKey::RandomAccess { + dataset: "taxi".into(), + }); + } + + #[test] + fn malformed_slug_rejected() { + assert!(ChartKey::from_slug("not-a-slug").is_err()); + assert!(ChartKey::from_slug("qm.****").is_err()); + } + + fn roundtrip_group(key: GroupKey) { + let slug = key.to_slug(); + let parsed = GroupKey::from_slug(&slug).expect("parses back"); + assert_eq!(parsed, key); + } + + #[test] + fn group_keys_roundtrip() { + roundtrip_group(GroupKey::QueryGroup { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + storage: "nvme".into(), + }); + roundtrip_group(GroupKey::CompressionTimeGroup); + roundtrip_group(GroupKey::CompressionSizeGroup); + roundtrip_group(GroupKey::RandomAccessGroup); + roundtrip_group(GroupKey::VectorSearchGroup { + dataset: "cohere".into(), + layout: "partitioned".into(), + }); + } + + #[test] + fn group_slug_prefix_distinct_from_chart() { + let chart = ChartKey::CompressionTime { + dataset: "tpch".into(), + dataset_variant: None, + } + .to_slug(); + let group = GroupKey::CompressionTimeGroup.to_slug(); + let chart_prefix = chart.split_once('.').unwrap().0; + let group_prefix = group.split_once('.').unwrap().0; + assert_ne!(chart_prefix, group_prefix); + } + + #[test] + fn malformed_group_slug_rejected() { + assert!(GroupKey::from_slug("not-a-slug").is_err()); + assert!(GroupKey::from_slug("qmg.****").is_err()); + } +} diff --git a/benchmarks-website/server/static/CHARTJS_PLUGIN_ZOOM_LICENSE.md b/benchmarks-website/server/static/CHARTJS_PLUGIN_ZOOM_LICENSE.md new file mode 100644 index 00000000000..5d0f4288b27 --- /dev/null +++ b/benchmarks-website/server/static/CHARTJS_PLUGIN_ZOOM_LICENSE.md @@ -0,0 +1,9 @@ +The MIT License (MIT) + +Copyright (c) 2013-2021 chartjs-plugin-zoom contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/benchmarks-website/server/static/CHART_JS_LICENSE.md b/benchmarks-website/server/static/CHART_JS_LICENSE.md new file mode 100644 index 00000000000..f216610fd7e --- /dev/null +++ b/benchmarks-website/server/static/CHART_JS_LICENSE.md @@ -0,0 +1,9 @@ +The MIT License (MIT) + +Copyright (c) 2014-2024 Chart.js Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/benchmarks-website/server/static/chart-init.js b/benchmarks-website/server/static/chart-init.js new file mode 100644 index 00000000000..3b5e616c3e9 --- /dev/null +++ b/benchmarks-website/server/static/chart-init.js @@ -0,0 +1,1587 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +// Hydrate Chart.js charts on /, /chart/:slug, and /group/:slug, plus the +// lazy-fetch-on-toggle behaviour for closed `
` groups. +// +// Per-chart UX: +// - Each `.chart-card` carries `data-chart-slug`. The card *owns* its own +// toolbar (`.toolbar--card`) — there is no page-level toolbar. +// - Each chart fetches the **entire raw history** once (`?n=all`). The +// server does no downsampling; we keep the raw payload on the canvas +// and re-derive what Chart.js renders on every scope/pan/zoom change. +// - `rebuildVisibleAndUpdate` is the single source of truth for the +// rendered point count. The cap is one constant: at most +// `MAX_VISIBLE_POINTS` *unique commit indices* (x-positions) are +// rendered, **shared across every series**. Below the cap we render +// every commit that has data; above it we LTTB the per-commit +// "max-y across series" to pick that many representatives, then +// every series renders at those shared indices. This is what the +// cap is *supposed* to mean: visually, the chart never has more +// than that many x-axis columns regardless of how many lines are +// on it. (Earlier per-series LTTB picked different peaks for each +// series and the union of x-positions blew past the cap.) +// - The slider is throttled to ~16ms (one frame at 60fps) per v2's +// `CONFIG.ZOOM_THROTTLE_DELAY` so dragging the slider feels continuous. +// - Mouse wheel pans horizontally (chartjs-plugin-zoom does not expose +// pan-on-wheel, so a manual `wheel` listener calls `chart.pan(...)`). +// - Drag-pan + drag-rectangle-zoom are wired through the plugin and +// trigger the same `rebuildVisibleAndUpdate` via `onPan`/`onZoom`. +// - A custom inline plugin draws a vertical crosshair at the hovered +// commit; the external tooltip is offset and `pointer-events: none` +// to fix the flicker described in the per-chart UX rebuild brief. +(function () { + "use strict"; + + // ----------------------------------------------------------------------- + // Constants + // ----------------------------------------------------------------------- + var ZOOM_THROTTLE_MS = 16; // one frame at ~60fps for slider drag + var PAN_THROTTLE_MS = 50; // pan/zoom throttle — looser than slider + var FETCH_N = "all"; // lazy-fetch the entire raw history + var DEFAULT_VISIBLE = 100; // initial visible window (last 100 of fetched) + // Mirror of `LANDING_INLINE_N` in `server/src/html.rs`. The first group's + // inline JSON is capped at this many commits to keep the cold landing + // page small. When the user zooms wider than what's inlined we lazy-fetch + // `?n=all` and replace the payload in place. If you change this, update + // the server too — the comparison `commits.length >= LANDING_INLINE_N` + // is what tells us the inline payload was potentially trimmed. + var LANDING_INLINE_N = 100; + // Hard cap on how many points a single series can render at once. When + // the visible commit range has more raw non-null points than this, we + // LTTB-downsample to exactly this number; below it we render raw. So + // the user always sees at most this many points per series, regardless + // of how far they zoom out, and the rule is one sentence: + // + // visible <= MAX_VISIBLE_POINTS → raw + // visible > MAX_VISIBLE_POINTS → LTTB to MAX_VISIBLE_POINTS + // + // Chart cards are ~600–900px on desktop and Chart.js draws ~2px point + // markers, so 500 points gives roughly 1.5px of horizontal space per + // point — about as dense as the eye can resolve. Bumping higher costs + // render time without visible improvement; lowering loses detail on + // wide cards. + var MAX_VISIBLE_POINTS = 500; + + // ----------------------------------------------------------------------- + // Global filter state (engine/format chips inside the navbar dropdown). + // + // Model: + // `globalFilter.engines` / `.formats` track the *active* (visible) set + // for that dimension. The chip's displayed active state mirrors + // visibility — every chip active means no filter is applied, exactly + // one chip inactive hides only that engine/format, and so on. The + // URL `?engine=`/`?format=` stay as allowlists for stability across + // refreshes; we omit the param when every chip is active (i.e. the + // active set equals the universe), so the no-filter URL is clean. + // + // Per-card overrides: + // Clicking a chart's legend toggles `dataset.hidden` and adds the label + // to that card's `canvas.__bench_overrides` set. The global apply pass + // skips overridden labels, so the user's manual call sticks even after + // subsequent global filter changes. + // ----------------------------------------------------------------------- + var globalFilter = readFilterState(); + var filterUniverse = readFilterUniverseFromDom(); + // `seedFromUrl` translates the URL state (allowlist) into the active set. + // Empty allowlist in the URL is treated as "no filter" → every chip + // active. Non-empty is taken verbatim, even if a chip has since been + // added or removed from the universe — keeps stale URLs deterministic. + seedActiveFromUrlState(); + + function readFilterState() { + var fallback = { engines: [], formats: [] }; + var node = document.getElementById("bench-filter-state"); + if (!node) return fallback; + try { + var parsed = JSON.parse(node.textContent); + return { + engines: Array.isArray(parsed.engines) ? parsed.engines.slice() : [], + formats: Array.isArray(parsed.formats) ? parsed.formats.slice() : [], + }; + } catch (e) { + return fallback; + } + } + + // Pull the chip universe straight from the rendered panel, so the JS + // doesn't have to mirror the server's enum. If the dropdown isn't on the + // page (shouldn't happen — the header always renders it when there's + // data) we fall back to whatever is in the URL state. + function readFilterUniverseFromDom() { + var u = { engines: [], formats: [] }; + document.querySelectorAll( + '[data-role="filter-panel"] .filter-chip[data-value]:not([data-value="*"])', + ).forEach(function (chip) { + var dim = chip.getAttribute("data-filter"); + var value = chip.getAttribute("data-value"); + if (!dim || !value) return; + var bucket = dim === "engine" ? u.engines : u.formats; + if (bucket.indexOf(value) === -1) bucket.push(value); + }); + return u; + } + + function seedActiveFromUrlState() { + if (!globalFilter.engines.length) { + globalFilter.engines = filterUniverse.engines.slice(); + } + if (!globalFilter.formats.length) { + globalFilter.formats = filterUniverse.formats.slice(); + } + } + + // Any-of-universe-missing-from-active means the dimension is filtered. + function dimensionIsFiltered(key) { + return globalFilter[key].length < filterUniverse[key].length; + } + + // A series is hidden when its engine/format dimension is filtered AND its + // tag isn't in the active set. Series without an engine tag (e.g. + // compression-time `format:op` series) are unaffected by the engine + // filter — symmetric for format. This keeps the chip semantics intuitive: + // hiding an engine doesn't nuke charts that have no engine dimension. + function seriesPassesFilter(meta) { + if (!meta) meta = {}; + if (meta.engine && dimensionIsFiltered("engines") + && globalFilter.engines.indexOf(meta.engine) === -1) { + return false; + } + if (meta.format && dimensionIsFiltered("formats") + && globalFilter.formats.indexOf(meta.format) === -1) { + return false; + } + return true; + } + + // ----------------------------------------------------------------------- + // Palette + helpers + // ----------------------------------------------------------------------- + var palette = [ + "#2563eb", "#dc2626", "#16a34a", "#ea580c", "#7c3aed", + "#0891b2", "#ca8a04", "#db2777", "#65a30d", "#475569", + ]; + + function colorFor(i) { return palette[i % palette.length]; } + + function shortSha(sha) { + return typeof sha === "string" ? sha.slice(0, 7) : String(sha); + } + + function shortDate(ts) { + if (typeof ts !== "string") return ""; + return ts.slice(0, 10); + } + + function truncate(s, max) { + if (typeof s !== "string") return ""; + return s.length > max ? s.slice(0, max - 1) + "…" : s; + } + + function firstLine(s) { + if (typeof s !== "string") return ""; + var nl = s.indexOf("\n"); + return nl >= 0 ? s.slice(0, nl) : s; + } + + // Vortex commits to `develop` are squash-merged from PRs; the squash subject + // ends with `(#NNNN)`. Returning just the number lets callers build either a + // PR or commit URL. + function parsePrNumber(message) { + if (typeof message !== "string") return null; + var m = message.match(/\(#(\d+)\)/); + return m ? m[1] : null; + } + + function escapeHtml(s) { + return String(s) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + function formatNumber(v, unit) { + if (v === null || v === undefined || Number.isNaN(v)) return "—"; + if (unit === "ns") { + var abs = Math.abs(v); + if (abs >= 1e9) return (v / 1e9).toFixed(2) + " s"; + if (abs >= 1e6) return (v / 1e6).toFixed(2) + " ms"; + if (abs >= 1e3) return (v / 1e3).toFixed(2) + " µs"; + return v.toFixed(0) + " ns"; + } + if (unit === "bytes") { + var a = Math.abs(v); + if (a >= 1024 * 1024 * 1024) return (v / (1024 * 1024 * 1024)).toFixed(2) + " GiB"; + if (a >= 1024 * 1024) return (v / (1024 * 1024)).toFixed(2) + " MiB"; + if (a >= 1024) return (v / 1024).toFixed(2) + " KiB"; + return v.toFixed(0) + " B"; + } + return v.toString(); + } + + // Throttle to a max call rate; trailing call is preserved so the final + // slider position is honoured. (`requestAnimationFrame` is conceptually + // similar but we want a hard ceiling regardless of when the browser + // schedules a frame.) + function throttle(fn, ms) { + var lastRan = 0; + var pending = null; + var pendingArgs = null; + return function () { + var now = Date.now(); + pendingArgs = arguments; + if (now - lastRan >= ms) { + lastRan = now; + fn.apply(null, pendingArgs); + } else if (!pending) { + var wait = ms - (now - lastRan); + pending = setTimeout(function () { + lastRan = Date.now(); + pending = null; + fn.apply(null, pendingArgs); + }, wait); + } + }; + } + + // ----------------------------------------------------------------------- + // LTTB (Largest-Triangle-Three-Buckets) downsampler. + // + // Returns the indices into `xs` / `ys` to keep, including index 0 and + // `n - 1`. `xs` must be strictly increasing. When `threshold >= n` or + // `threshold < 3`, returns `[0, 1, ..., n-1]` unchanged. + // + // Algorithm: . Per-bucket pick the + // point that forms the largest triangle with the previously kept point + // and the average of the next bucket. + // ----------------------------------------------------------------------- + function lttbIndices(xs, ys, threshold) { + var n = xs.length; + if (threshold >= n || threshold < 3) { + var all = new Array(n); + for (var i = 0; i < n; i++) all[i] = i; + return all; + } + var out = new Array(threshold); + out[0] = 0; + var bucket = (n - 2) / (threshold - 2); + var a = 0; + for (var bi = 0; bi < threshold - 2; bi++) { + // Average of the *next* bucket — the "C" point in the triangle. + var nextStart = Math.floor((bi + 1) * bucket) + 1; + var nextEnd = Math.min(n, Math.floor((bi + 2) * bucket) + 1); + var count = Math.max(1, nextEnd - nextStart); + var ax = 0, ay = 0; + for (var j = nextStart; j < nextEnd; j++) { ax += xs[j]; ay += ys[j]; } + ax /= count; ay /= count; + + // Search this bucket for the point with the largest triangle area + // against (a, avg_next). + var rangeStart = Math.floor(bi * bucket) + 1; + var rangeEnd = Math.floor((bi + 1) * bucket) + 1; + var pax = xs[a], pay = ys[a]; + var maxArea = -1; + var maxIdx = rangeStart; + for (var k = rangeStart; k < rangeEnd; k++) { + var area = Math.abs((pax - ax) * (ys[k] - pay) - (pax - xs[k]) * (ay - pay)) * 0.5; + if (area > maxArea) { maxArea = area; maxIdx = k; } + } + out[bi + 1] = maxIdx; + a = maxIdx; + } + out[threshold - 1] = n - 1; + return out; + } + + // ----------------------------------------------------------------------- + // Crosshair plugin: draws a vertical line at the chart's active hover + // index. Using an inline plugin is cheaper than pulling in + // chartjs-plugin-crosshair, which is overkill for this one feature. + // ----------------------------------------------------------------------- + var crosshairPlugin = { + id: "benchCrosshair", + afterDatasetsDraw: function (chart) { + var active = chart.tooltip && chart.tooltip.getActiveElements + ? chart.tooltip.getActiveElements() + : []; + if (!active || !active.length) return; + var x = active[0].element.x; + var ya = chart.scales && chart.scales.y; + if (!ya || !Number.isFinite(x)) return; + var ctx = chart.ctx; + ctx.save(); + // `--muted` from the page theme — read it lazily so dark mode picks + // up the right colour. + var muted = getComputedStyle(document.documentElement) + .getPropertyValue("--muted").trim() || "#9ca3af"; + ctx.strokeStyle = muted; + ctx.lineWidth = 1; + ctx.setLineDash([4, 4]); + ctx.beginPath(); + ctx.moveTo(x, ya.top); + ctx.lineTo(x, ya.bottom); + ctx.stroke(); + ctx.restore(); + }, + }; + + // ----------------------------------------------------------------------- + // External tooltip with offset + flip-on-overflow. + // + // Flicker fix: the tooltip host is **always** `pointer-events: none`. The + // previous implementation flipped it to `auto` when visible; the cursor + // would land on the tooltip, fire mouseout on the canvas, the tooltip + // would hide, the cursor would re-enter the canvas, and the cycle would + // repeat at event-loop frequency. Clicks on a data point are handled by + // the chart's `onClick` (opens the PR or commit URL in a new tab), so the + // tooltip itself never needs to be interactive. + // ----------------------------------------------------------------------- + function externalTooltipHandler(canvas, host) { + return function (context) { + var tt = context.tooltip; + if (!host) return; + if (tt.opacity === 0) { + host.style.opacity = "0"; + return; + } + + var chart = context.chart; + var payload = canvas.__bench_payload || { commits: [], unit: "" }; + var firstDp = tt.dataPoints && tt.dataPoints[0]; + if (!firstDp) { + host.style.opacity = "0"; + return; + } + // Snap to a single commit. We use `mode: "nearest"` on the chart + // options, so `firstDp.dataIndex` is the single closest data point + // to the cursor (skipping nulls in `dataset.data`). If the cursor + // falls between two LTTB-kept points, exactly one wins — no more + // rendering both columns at once. + var idx = firstDp.dataIndex; + var commit = (payload.commits || [])[idx] || {}; + var unit = payload.unit || ""; + var rawLen = (chart.data.labels || []).length; + + // Build one row per dataset, reading values from each series' + // `rawData` (the unmodified payload) so the tooltip shows raw + // measurements even when LTTB has nulled out `dataset.data[idx]`. + // Iterating `chart.data.datasets` directly — instead of mapping + // `tt.dataPoints` — guarantees one row per series at this single + // commit; `tt.dataPoints` could otherwise contain points from + // multiple `dataIndex` values when the cursor sits between two + // closely-packed LTTB columns. + var rowItems = chart.data.datasets.map(function (ds, dsIndex) { + // Skip datasets the user (or filter bar) has hidden. + var meta = chart.getDatasetMeta && chart.getDatasetMeta(dsIndex); + if (meta && meta.hidden) return null; + if (ds.hidden) return null; + var raw = (ds.rawData || [])[idx]; + if (raw === null || raw === undefined || Number.isNaN(raw)) { + return null; + } + // Per-row delta is `(current - previous) / previous`, where + // "previous" is the chronologically preceding commit. The + // `commits[]` array is sorted oldest-first by SQL — index 0 is + // the oldest commit, the last index is the newest — so the + // predecessor lives at `idx - 1`. Walk further back across + // null-valued slots so series that didn't run on every commit + // still get a meaningful baseline. + var prevIdx = idx - 1; + var prevRaw = null; + while (prevIdx >= 0) { + var pv = (ds.rawData || [])[prevIdx]; + if (pv !== null && pv !== undefined && !Number.isNaN(pv)) { + prevRaw = pv; + break; + } + prevIdx--; + } + var deltaHtml = ""; + if (prevRaw !== null && prevRaw !== 0) { + var pct = ((raw - prevRaw) / prevRaw) * 100; + var cls = pct > 0 ? "tt-delta tt-delta--worse" + : pct < 0 ? "tt-delta tt-delta--better" : "tt-delta"; + var sign = pct > 0 ? "+" : ""; + deltaHtml = '' + sign + pct.toFixed(1) + "%"; + } + return { + label: ds.label, + color: ds.borderColor, + raw: raw, + deltaHtml: deltaHtml, + }; + }).filter(Boolean); + + // Top-to-bottom order matches the visual stack of lines at this x. + rowItems.sort(function (a, b) { return b.raw - a.raw; }); + + var rows = rowItems + .map(function (r) { + return '
' + + '' + + '' + escapeHtml(r.label) + '' + + '' + escapeHtml(formatNumber(r.raw, unit)) + '' + + r.deltaHtml + + "
"; + }) + .join(""); + + // If every series was hidden / had no value at this commit, treat + // this as a no-op hover instead of flashing an empty popup. + if (!rows) { + host.style.opacity = "0"; + return; + } + + var titleHtml = '
' + + escapeHtml(shortSha(commit.sha)) + ' · ' + + escapeHtml(shortDate(commit.timestamp)) + + "
"; + + // Show short SHA + first-line commit message, truncated. The full URL + // (or PR URL) is wired up via the chart's onClick handler, so we don't + // render it as text here. + var msg = truncate(firstLine(commit.message || ""), 80); + var footerLine = commit.sha + ? (msg ? escapeHtml(shortSha(commit.sha)) + " · " + escapeHtml(msg) + : escapeHtml(shortSha(commit.sha))) + : escapeHtml(msg); + var footerHtml = footerLine + ? '" + : ""; + + host.innerHTML = titleHtml + '
' + rows + "
" + footerHtml; + + // Position the tooltip relative to its container, offset 12px from + // the cursor. Flip horizontally if it would overflow. + var canvasRect = context.chart.canvas.getBoundingClientRect(); + var hostRect = host.parentNode.getBoundingClientRect(); + var x = canvasRect.left - hostRect.left + tt.caretX; + var y = canvasRect.top - hostRect.top + tt.caretY; + host.style.opacity = "1"; + host.style.left = x + "px"; + host.style.top = y + "px"; + // Measure after content swap so flipping is correct. + var ttWidth = host.offsetWidth || 0; + var containerWidth = host.parentNode.clientWidth || 0; + var flip = (x + ttWidth + 24) > containerWidth; + host.style.transform = flip + ? "translate(calc(-100% - 12px), 12px)" + : "translate(12px, 12px)"; + }; + } + + // ----------------------------------------------------------------------- + // Payload + datasets + // ----------------------------------------------------------------------- + function readInlinePayload(idx) { + var s = document.getElementById("chart-data-" + idx); + if (!s) return null; + try { return JSON.parse(s.textContent); } catch (e) { return null; } + } + + // Build the per-series dataset shells. `data` starts as a full-length + // null-padded array; `rebuildVisibleAndUpdate` fills it in based on the + // current visible range. `rawData` holds a reference to the original + // payload so the tooltip can show raw values regardless of LTTB. + function buildDatasets(payload) { + var raw = payload.series || {}; + var meta = payload.series_meta || {}; + var n = (payload.commits || []).length; + var names = Object.keys(raw).sort(); + return names.map(function (name, i) { + var seriesMeta = meta[name] || {}; + var rawValues = Array.isArray(raw[name]) ? raw[name] : []; + // `data` starts null-padded; `rebuildVisibleAndUpdate` fills the + // current visible window with raw or LTTB-kept values. Chart.js's + // `spanGaps: true` means nulls render as gaps. + var data = new Array(n); + for (var j = 0; j < n; j++) data[j] = null; + return { + label: name, + data: data, + rawData: rawValues, + borderColor: colorFor(i), + backgroundColor: colorFor(i) + "20", + borderWidth: 1.5, + spanGaps: true, + tension: 0, + pointRadius: 2, + pointHoverRadius: 5, + pointHitRadius: 8, + pointStyle: "cross", + // Custom field (Chart.js ignores unknown keys). Used by the global + // filter to decide which datasets to hide/show in bulk. + benchMeta: { engine: seriesMeta.engine, format: seriesMeta.format }, + hidden: !seriesPassesFilter(seriesMeta), + }; + }); + } + + // ----------------------------------------------------------------------- + // The single source of truth for the rendered point count. + // + // Walks the visible `[rangeMin, rangeMax]` window of the raw payload and, + // for each series, renders raw when the visible count is at or below + // `MAX_VISIBLE_POINTS` and LTTB-downsamples to exactly that number when + // above. The result is written into `dataset.data` with nulls outside + // the kept set so Chart.js renders just the kept points (with + // `spanGaps: true`). + // + // Mutates `dataset.data` in place to avoid GC churn on every pan frame. + // Updates the per-card downsample badge as a side effect. + // ----------------------------------------------------------------------- + function rebuildVisibleAndUpdate(card, chart, rangeMin, rangeMax) { + var canvas = chart.canvas; + var payload = canvas.__bench_payload; + if (!payload) return; + var datasets = chart.data.datasets; + var n = (payload.commits || []).length; + if (n === 0) return; + + var min = Math.max(0, Math.floor(rangeMin)); + var max = Math.min(n - 1, Math.ceil(rangeMax)); + if (max < min) max = min; + + // Build one "virtual series" for LTTB: walk every commit index in the + // visible range and, for each index, take the max non-null value + // across all datasets. This is the union of x-positions, with a + // representative y per position. Series in a Vortex chart share both + // unit and overall scale (they're the same benchmark with different + // engines/formats), so max-across-series picks visually salient peaks + // without per-series scale skew. + // + // This becomes our LTTB input: we then pick AT MOST MAX_VISIBLE_POINTS + // commit indices and every dataset renders only at those shared + // indices. Without this, per-series LTTB picked different peaks for + // each series and the union of x-positions grew with the series + // count — visually you saw way more than MAX_VISIBLE_POINTS dots + // even though each line only had MAX_VISIBLE_POINTS. + var unionIdxs = []; + var unionVals = []; + for (var i = min; i <= max; i++) { + var bestY = null; + for (var di = 0; di < datasets.length; di++) { + var rawValues = datasets[di].rawData; + if (!Array.isArray(rawValues)) continue; + var v = rawValues[i]; + if (v !== null && v !== undefined && !Number.isNaN(v) + && (bestY === null || v > bestY)) { + bestY = v; + } + } + if (bestY !== null) { + unionIdxs.push(i); + unionVals.push(bestY); + } + } + + // Decide which commit indices to render — shared across all series. + var keptSet = {}; + var anyDownsampled = false; + if (unionIdxs.length <= MAX_VISIBLE_POINTS) { + // Below the cap: render every commit that has data anywhere. + for (var u = 0; u < unionIdxs.length; u++) keptSet[unionIdxs[u]] = true; + } else { + // Above the cap: LTTB the union down to MAX_VISIBLE_POINTS exactly. + // The selected indices are then *shared* across every dataset; that + // is the cap's only correct interpretation of "max points on the + // chart at a time". + var localIndices = lttbIndices(unionIdxs, unionVals, MAX_VISIBLE_POINTS); + for (var li = 0; li < localIndices.length; li++) { + keptSet[unionIdxs[localIndices[li]]] = true; + } + anyDownsampled = true; + } + + // Plant the shared kept set into every dataset.data. Series that have + // no value at a kept index simply remain null there — `spanGaps: true` + // lets the line connect across. + for (var dj = 0; dj < datasets.length; dj++) { + var ds = datasets[dj]; + var dsRaw = ds.rawData; + if (!Array.isArray(dsRaw)) continue; + var data = ds.data; + if (!Array.isArray(data) || data.length !== n) { + data = new Array(n); + ds.data = data; + } + for (var z = 0; z < n; z++) data[z] = null; + for (var idxStr in keptSet) { + var idx = +idxStr; + var val = dsRaw[idx]; + if (val !== null && val !== undefined && !Number.isNaN(val)) { + data[idx] = val; + } + } + } + + var visibleCommits = max - min + 1; + var keptCommits = 0; + for (var _u in keptSet) keptCommits++; + chart.update("none"); + syncSliderFromRange(card, visibleCommits); + syncDownsampleBadge(card, keptCommits, visibleCommits, anyDownsampled); + // If the user has zoomed out to cover everything we have inlined and the + // server might have more commits, fetch the full history in the + // background. The `__bench_full_loaded` / `__bench_full_fetch_pending` + // flags dedupe so this fires once per chart even when called every + // pan frame. + maybeRefetchFullPayload(card, min, max, n); + } + + // ----------------------------------------------------------------------- + // Lazy-upgrade an inline-trimmed payload to the full history. + // + // The landing page inlines at most `LANDING_INLINE_N` commits per chart + // (server: `html.rs::LANDING_INLINE_N`) so the cold HTML body stays small. + // The first time the user zooms wide enough to ask for everything we have + // loaded we replace the payload with the unbounded view from + // `/api/chart/{slug}?n=all`. The chart's pan/zoom limits and the toolbar + // slider's max grow to match, so subsequent zoom-out passes can scroll + // back through the older commits the inline payload didn't include. + // ----------------------------------------------------------------------- + function maybeRefetchFullPayload(card, min, max, loadedCount) { + var canvas = card.querySelector("canvas"); + if (!canvas) return; + if (!canvas.__bench_inline_trimmed) return; + if (canvas.__bench_full_loaded || canvas.__bench_full_fetch_pending) return; + // Trigger only when the visible range covers (effectively) every loaded + // commit. Anything narrower means the user hasn't asked for "more" + // yet — there's no reason to spend bandwidth on a refetch they don't + // need. + if (loadedCount <= 0) return; + var coversAll = (max - min + 1) >= loadedCount; + if (!coversAll) return; + canvas.__bench_full_fetch_pending = true; + var slug = card.getAttribute("data-chart-slug"); + if (!slug) { + canvas.__bench_full_fetch_pending = false; + return; + } + var url = "/api/chart/" + encodeURIComponent(slug) + + "?n=" + encodeURIComponent(FETCH_N); + fetch(url, { headers: { "accept": "application/json" } }) + .then(function (r) { + if (r.status === 404) return null; + if (!r.ok) throw new Error("HTTP " + r.status); + return r.json(); + }) + .then(function (full) { + if (!full) return; + replaceChartPayload(card, full); + canvas.__bench_full_loaded = true; + canvas.__bench_inline_trimmed = false; + }) + .catch(function (err) { + // Quiet — the inline payload is still rendered, the user just + // can't zoom past it. Surface to the console for debugging. + if (window && window.console) { + window.console.warn("bench: full history refetch failed", err); + } + }) + .then(function () { + canvas.__bench_full_fetch_pending = false; + }); + } + + // Swap the chart's labels + datasets to a freshly fetched, unbounded + // payload while keeping the user's currently visible commit window + // anchored on the *newest* commit. The pan/zoom limits and toolbar + // slider bounds are extended to the new total commit count. + function replaceChartPayload(card, payload) { + var canvas = card.querySelector("canvas"); + var chart = canvas && canvas.__bench_chart; + if (!chart || !payload) return; + canvas.__bench_payload = payload; + var newLabels = (payload.commits || []).map(function (c) { + return shortSha(c.sha); + }); + var newDatasets = buildDatasets(payload); + // Re-apply per-card legend overrides + global filter to the new datasets, + // matching the visibility state the user had before the refetch. + var overrides = canvas.__bench_overrides || {}; + for (var i = 0; i < newDatasets.length; i++) { + var ds = newDatasets[i]; + if (overrides[ds.label]) { + // Honour any explicit legend toggle the user had made already. + var prev = chart.data.datasets.find(function (p) { + return p.label === ds.label; + }); + if (prev) ds.hidden = !!prev.hidden; + } + } + chart.data.labels = newLabels; + chart.data.datasets = newDatasets; + var newMaxIdx = Math.max(0, newLabels.length - 1); + var zoomLimits = chart.options.plugins + && chart.options.plugins.zoom + && chart.options.plugins.zoom.limits + && chart.options.plugins.zoom.limits.x; + if (zoomLimits) { + zoomLimits.max = newMaxIdx; + } + syncSliderBounds(card, newLabels.length); + // Keep the user's "scope" (number of visible commits) but anchor the + // window on the newest commit so they don't drift backwards in time + // unexpectedly. Without this anchoring, the visible range would still + // be `[0, oldN-1]` — i.e., the *oldest* `oldN` commits of the new + // payload — which is the opposite of what the user wanted when they + // zoomed out. + var sx = chart.options.scales.x; + var prevMin = Number.isFinite(sx.min) ? sx.min : 0; + var prevMax = Number.isFinite(sx.max) ? sx.max : 0; + var prevVisible = Math.max(1, prevMax - prevMin + 1); + sx.max = newMaxIdx; + sx.min = Math.max(0, newMaxIdx - (prevVisible - 1)); + rebuildVisibleAndUpdate(card, chart, sx.min, sx.max); + if (canvas.__bench_strip_render) canvas.__bench_strip_render(); + } + + // Mirror the chart's current visible commit count onto the toolbar + // slider. Called from `rebuildVisibleAndUpdate` so every path that + // changes the visible range — toolbar slider drag, drag-pan, + // drag-rectangle-zoom, wheel-pan, range-strip drag — keeps the + // slider in sync. Programmatic value writes do not fire the slider's + // `input` event, so this never re-enters `applyScope`. + function syncSliderFromRange(card, visibleCommits) { + var slider = card.querySelector('[data-role="scope-slider"]'); + if (!slider) return; + var lo = parseInt(slider.min, 10) || 1; + var hi = parseInt(slider.max, 10) || visibleCommits; + slider.value = String(Math.max(lo, Math.min(hi, visibleCommits))); + } + + // Show the badge when at least one series in the visible range was + // downsampled. The numbers are commit counts: how many distinct + // commits the chart is rendering, and how many are in the visible + // range. Both come from the slider's mental model so "300 / 3000" in + // the badge matches "showing the last 3000" on the slider. + function syncDownsampleBadge(card, keptCommits, visibleCommits, anyDownsampled) { + var badge = card.querySelector('[data-role="downsample-badge"]'); + if (!badge) return; + if (!anyDownsampled || keptCommits >= visibleCommits) { + badge.setAttribute("hidden", ""); + badge.textContent = ""; + return; + } + badge.removeAttribute("hidden"); + badge.textContent = "downsampled · " + keptCommits + " / " + visibleCommits; + badge.setAttribute( + "title", + "Showing " + keptCommits + " of " + visibleCommits + + " commits in view. Each series renders at most " + + MAX_VISIBLE_POINTS + " points at a time; when more are in " + + "view, we apply LTTB (Largest Triangle, Three Buckets), an " + + "algorithm that picks representative points by maximising " + + "the area of triangles formed with neighbouring buckets. " + + "Visual peaks and valleys are preserved while the chart " + + "stays responsive. Zoom in past " + MAX_VISIBLE_POINTS + + " visible commits to see every raw measurement." + ); + } + + // ----------------------------------------------------------------------- + // Per-card construction. State lives on the canvas: + // canvas.__bench_chart — Chart.js instance + // canvas.__bench_payload — last-fetched ChartResponse (raw) + // canvas.__bench_state — { y, scope } (per-chart toolbar state) + // canvas.__bench_inline_trimmed — true if the payload came from an + // inline `

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap new file mode 100644 index 00000000000..9051d645cf1 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: body +--- +TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap new file mode 100644 index 00000000000..a696c719c95 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: body +--- +bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap new file mode 100644 index 00000000000..1a995f8a01f --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: filter_bar_section(&body) +--- +
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs new file mode 100644 index 00000000000..53efe2fc3d3 --- /dev/null +++ b/benchmarks-website/server/tests/web_ui.rs @@ -0,0 +1,1541 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for the web-ui HTML routes. +//! +//! Builds a temp DuckDB via the same `/api/ingest` path real callers use, +//! seeds it with a multi-commit fixture so chart series have more than one +//! point, then snapshots the rendered HTML for each route plus a chart slug +//! round-trip. + +use std::io::Read as _; +use std::net::SocketAddr; + +use anyhow::Context as _; +use anyhow::Result; +use flate2::read::GzDecoder; +use serde_json::Value; +use serde_json::json; +use tempfile::TempDir; +use tokio::net::TcpListener; +use tokio::task::JoinHandle; +use vortex_bench_server::app::AppState; +use vortex_bench_server::app::router; + +const TOKEN: &str = "test-bearer-token"; + +struct Server { + addr: SocketAddr, + _tmp: TempDir, + handle: JoinHandle<()>, +} + +impl Server { + async fn start() -> Result { + let tmp = TempDir::new()?; + let db_path = tmp.path().join("bench.duckdb"); + let state = AppState::open(&db_path, TOKEN.to_string())?; + let app = router(state); + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let addr = listener.local_addr()?; + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + Ok(Self { + addr, + _tmp: tmp, + handle, + }) + } + + fn url(&self, path: &str) -> String { + format!("http://{}{}", self.addr, path) + } +} + +impl Drop for Server { + fn drop(&mut self) { + self.handle.abort(); + } +} + +/// Three synthetic commits, oldest first. Picked so the rendered output has +/// short SHAs that are visually distinct in snapshots. +fn commits() -> &'static [(&'static str, &'static str, &'static str)] { + &[ + ( + "1111111111111111111111111111111111111111", + "2026-04-23T12:00:00Z", + "first commit", + ), + ( + "2222222222222222222222222222222222222222", + "2026-04-24T12:00:00Z", + "second commit", + ), + ( + "3333333333333333333333333333333333333333", + "2026-04-25T12:00:00Z", + "third commit", + ), + ] +} + +/// Build a fixture envelope for one commit; `value_bias` is added to each +/// numeric measurement so successive commits produce a non-flat time series. +fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { + json!({ + "run_meta": { + "benchmark_id": "web-ui-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": [ + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 1, + "storage": "nvme", + "engine": "datafusion", + "format": "vortex-file-compressed", + "value_ns": 1_000_000 + value_bias, + "all_runtimes_ns": [1_000_000 + value_bias] + }, + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 1, + "storage": "nvme", + "engine": "duckdb", + "format": "parquet", + "value_ns": 800_000 + value_bias, + "all_runtimes_ns": [800_000 + value_bias] + }, + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 2, + "storage": "nvme", + "engine": "datafusion", + "format": "vortex-file-compressed", + "value_ns": 600_000 + value_bias, + "all_runtimes_ns": [600_000 + value_bias] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "op": "encode", + "value_ns": 9_000 + value_bias, + "all_runtimes_ns": [9_000 + value_bias] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "op": "decode", + "value_ns": 5_000 + value_bias, + "all_runtimes_ns": [5_000 + value_bias] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "op": "encode", + "value_ns": 18_000 + (2 * value_bias), + "all_runtimes_ns": [18_000 + (2 * value_bias)] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "op": "decode", + "value_ns": 10_000 + (2 * value_bias), + "all_runtimes_ns": [10_000 + (2 * value_bias)] + }, + { + "kind": "compression_size", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "value_bytes": 4_000 + value_bias + }, + { + "kind": "compression_size", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "value_bytes": 8_000 + (2 * value_bias) + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "vortex-file-compressed", + "value_ns": 500 + value_bias, + "all_runtimes_ns": [500 + value_bias] + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "parquet", + "value_ns": 1_000 + (2 * value_bias), + "all_runtimes_ns": [1_000 + (2 * value_bias)] + }, + { + "kind": "vector_search_run", + "commit_sha": sha, + "dataset": "cohere-large-10m", + "layout": "partitioned", + "flavor": "vortex-turboquant", + "threshold": 0.75, + "value_ns": 7_000 + value_bias, + "all_runtimes_ns": [7_000 + value_bias], + "matches": 42, + "rows_scanned": 1_000_000, + "bytes_scanned": 5_000_000, + "iterations": 1 + } + ] + }) +} + +async fn seed(server: &Server) -> Result<()> { + let client = reqwest::Client::new(); + for (i, (sha, ts, msg)) in commits().iter().enumerate() { + let bias = (i as i64) * 50_000; + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope_for(sha, ts, msg, bias)) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "seed ingest #{i} failed: {}", + resp.status() + ); + } + Ok(()) +} + +/// Slim ingest envelope carrying just a `random_access_time` pair so we can +/// drive a long-history fixture cheaply (the full envelope is ~12 records; +/// this is two). Used by the downsample tests. +fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value { + json!({ + "run_meta": { + "benchmark_id": "downsample-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": [ + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "vortex-file-compressed", + "value_ns": 500 + bias, + "all_runtimes_ns": [500 + bias] + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "parquet", + "value_ns": 1_000 + (2 * bias), + "all_runtimes_ns": [1_000 + (2 * bias)] + } + ] + }) +} + +/// Seed a `Random Access` chart with `n` synthetic commits so the +/// downsampler has something to chew on. SHAs are deterministic +/// `{i:040x}`; timestamps are 1 minute apart starting 2025-01-01 so the +/// commits sort stably. +async fn seed_long_history(server: &Server, n: usize) -> Result<()> { + let client = reqwest::Client::new(); + for i in 0..n { + let sha = format!("{i:040x}"); + let minutes = i; + let ts = format!( + "2025-01-01T{:02}:{:02}:00Z", + (minutes / 60) % 24, + minutes % 60 + ); + // Sinusoidal bias so the series has interior peaks LTTB will retain. + let bias = ((i as f64).sin() * 1_000.0) as i64 + i as i64 * 10; + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&ra_envelope_for(&sha, &ts, "synthetic", bias)) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "long-history ingest #{i} failed: {}", + resp.status() + ); + } + Ok(()) +} + +/// Pull the inline `` JSON out of an +/// HTML body. Returns `None` if the script tag isn't present. +fn extract_chart_data(body: &str, idx: usize) -> Option { + let needle = format!(r#"")? + start; + // Reverse the ` insta::Settings { + let mut s = insta::Settings::clone_current(); + s.set_snapshot_path("snapshots"); + s.set_prepend_module_to_snapshot(false); + s +} + +/// Lift a single chart slug from `/api/groups`, picking from a group whose +/// name matches `predicate`. Used by tests that need a real slug to drive +/// `/chart/{slug}` and `/api/chart/{slug}` round-trips. +async fn pick_chart_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result { + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str().is_some_and(&predicate)) + .and_then(|g| g["charts"].as_array()) + .and_then(|c| c.first()) + .and_then(|c| c["slug"].as_str()) + .map(str::to_string) + .context("matching chart slug") +} + +async fn pick_group_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result { + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str().is_some_and(&predicate)) + .and_then(|g| g["slug"].as_str()) + .map(str::to_string) + .context("matching group slug") +} + +fn group_by_name<'a>(groups: &'a Value, name: &str) -> Result<&'a Value> { + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str() == Some(name)) + .with_context(|| format!("group {name:?} exists")) +} + +fn assert_close(actual: f64, expected: f64) { + let delta = (actual - expected).abs(); + assert!( + delta < 0.000_001, + "expected {actual} to be close to {expected}" + ); +} + +#[tokio::test] +async fn landing_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let resp = client.get(server.url("/")).send().await?; + assert_eq!(resp.status(), 200); + let content_type = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + assert!( + content_type.starts_with("text/html"), + "expected text/html, got {content_type:?}" + ); + let body = resp.text().await?; + + // Inline canvas + chart-data-0 from the first group (every group is + // collapsed by default, but the first group's payload is inlined for + // fast on-toggle hydration). + assert!( + body.contains("" + ); + assert!( + body.contains(r#"id="chart-data-0""#), + "the first group must inline its chart payload for fast on-toggle hydration" + ); + assert!( + body.contains(r#"data-chart-slug="#), + "every chart card carries data-chart-slug for the lazy-fetch path" + ); + assert!( + !body.contains(r#"id="group-search""#), + "landing page should not render the old group search bar" + ); + assert!( + body.contains(r#"class="sticky-header""#), + "landing page should render the v2-style top navbar" + ); + assert!( + body.contains(r#"data-action="expand-all""#) + && body.contains(r#"data-action="collapse-all""#), + "navbar should expose expand/collapse controls" + ); + assert!( + body.contains(r#"data-role="theme-toggle""#), + "navbar should expose a theme toggle" + ); + assert!( + body.contains(r#"class="btn-icon""#) + || body.contains(r#"class="btn-icon theme-icon theme-icon-light""#), + "navbar controls should render icons" + ); + assert!( + body.contains(r#"vortex_black_nobg.svg"#) && body.contains(r#"vortex_white_nobg.svg"#), + "navbar should render the Vortex logo assets" + ); + assert!( + body.contains("⚡") && body.contains("📤") && body.contains("⬇️") && body.contains("📊"), + "summaries should render the v2 summary icons" + ); + + insta_settings().bind(|| { + insta::assert_snapshot!("landing_page", body); + }); + Ok(()) +} + +/// All group disclosures render closed by default — the user picks which +/// to expand. The first group's chart payloads are still inlined in the +/// HTML (so opening it skips the JS fetch), but the disclosure itself +/// stays collapsed until clicked. +#[tokio::test] +async fn details_all_groups_closed_by_default() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let opens: Vec<_> = body + .match_indices(r#"
').map(|p| i + p).unwrap_or(i); + body[i..=tag_end].contains(" open") + }) + .collect(); + assert!(!opens.is_empty(), "landing page must render
"); + for (i, is_open) in opens.iter().enumerate() { + assert!(!is_open, "group #{i} must be closed by default"); + } + // The first group's chart payload should still be inlined — fast + // hydration on toggle without a network round-trip. + assert!( + body.contains(r#"id="chart-data-0""#), + "first group's chart payload should be inlined for fast on-toggle hydration", + ); + Ok(()) +} + +#[tokio::test] +async fn collapsed_groups_still_show_summaries() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let mut found_visible_summary = false; + for (group_start, _) in body.match_indices(r#"
') + .map(|p| details_start + p) + .context("details tag closes")?; + let is_open = body[details_start..=details_tag_end].contains(" open"); + if is_open { + continue; + } + + let summary_end = body[details_start..] + .find("
") + .map(|p| details_start + p) + .context("disclosure closes")?; + let chart_grid_start = body[summary_end..] + .find(r#"
"#) + .map(|p| summary_end + p) + .context("details contains chart grid")?; + let visible_region = &body[summary_end..chart_grid_start]; + if visible_region.contains(r#"class="benchmark-scores-summary""#) { + found_visible_summary = true; + break; + } + } + + assert!( + found_visible_summary, + "at least one closed group should render its score summary before the hidden chart grid" + ); + Ok(()) +} + +/// Every `.chart-card` carries a compact `.toolbar.toolbar--card` so the user +/// has per-chart controls. There is no page-level toolbar, no preset scope +/// button row, and no abs/rel mode toggle. +#[tokio::test] +async fn chart_card_carries_per_chart_toolbar() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let card_count = body.matches(r#"
0, "landing page must render chart cards"); + assert_eq!( + toolbar_count, card_count, + "every chart-card must contain a toolbar--card ({card_count} cards / {toolbar_count} toolbars)" + ); + assert_eq!( + strip_count, card_count, + "every chart-card must carry a range-strip below the canvas \ + ({card_count} cards / {strip_count} strips)" + ); + assert!( + body.contains(r#"data-role="range-window""#) + && body.contains(r#"data-role="range-handle-left""#) + && body.contains(r#"data-role="range-handle-right""#), + "range-strip must include a draggable window and two resize handles" + ); + assert!( + !body.contains(r#"data-mode="#), + "abs/rel mode buttons should not render" + ); + assert!( + !body.contains(r#"data-scope="#), + "preset scope buttons should not render; use the slider instead" + ); + assert!( + body.contains(r#"data-role="scope-slider""#), + "scope slider should remain available" + ); + assert!( + !body.contains(r#"scope-slider-label"#), + "scope value labels should not add repeated numbers to every card" + ); + + // Same invariant on /chart/{slug}. + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await? + .text() + .await?; + assert!( + body.contains(r#"class="toolbar toolbar--card""#), + "chart page must carry a per-chart toolbar" + ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); + + // Same invariant on /group/{slug}. + let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; + assert!( + body.contains(r#"class="toolbar toolbar--card""#), + "group page must carry per-chart toolbars" + ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); + Ok(()) +} + +/// Landing-page `
` summaries appear in the canonical v2 order: the +/// fixture seeds Random Access, Compression, Compression Size, TPC-H, and a +/// vector-search group. The first three are in [`api::GROUP_ORDER`] in the +/// expected positions; TPC-H follows; the unknown vector-search group sorts +/// last (alphabetical fallback after the listed names). +#[tokio::test] +async fn landing_groups_render_in_v2_order() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // Extract group names in render order from the `data-group-name=` attrs. + let mut names = Vec::new(); + for window in body.split("data-group-name=\"").skip(1) { + if let Some(end) = window.find('"') { + names.push(window[..end].to_string()); + } + } + let expected = [ + "Random Access", + "Compression", + "Compression Size", + "TPC-H (NVMe) (SF=1)", + "cohere-large-10m / partitioned", + ]; + assert_eq!(names, expected, "v2 ordering"); + Ok(()) +} + +#[tokio::test] +async fn chart_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + // The query_measurements chart has two series so the snapshot + // exercises multi-series rendering. + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + + let resp = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await?; + assert_eq!(resp.status(), 200); + let body = resp.text().await?; + assert!( + body.contains(r#"