From b2a7093577228c5186f686b963151b0658bfcf12 Mon Sep 17 00:00:00 2001
From: Justin Kim <justin.joy.9to5@gmail.com>
Date: Thu, 30 Apr 2026 15:12:23 +0900
Subject: [PATCH 1/3] feat: ksuid_explicit_bzero shim with platform-specific
 DSE-resistant wipe

Closes #2 (commit 1 of 2 in the series).

Adds a private DSE-resistant zeroizer in libksuid/wipe.h. Plain
memset(p, 0, n) on a buffer the compiler proves is never read again
is allowed -- and at -O2+ encouraged -- to be elided entirely. For
sensitive material (CSPRNG seed bytes, ChaCha20 internal state,
freshly-drawn key material) that is exactly the wrong outcome.

ksuid_explicit_bzero is a header-only static inline that resolves
at compile time to the strongest DSE-immune primitive the target
offers, in this order:

  1. explicit_bzero  (glibc 2.25+, MUSL, *BSD, macOS 14.4+)
     -- documented to resist optimisation. Two meson probes pick
        between <string.h> (modern glibc, macOS, OpenBSD) and
        <strings.h> (FreeBSD, NetBSD, older glibc/MUSL).
  2. SecureZeroMemory  (Windows / Cygwin via <windows.h>)
     -- MSDN guarantees the writes are not optimised away.
  3. memset_s  (C11 Annex K, rare)
  4. Indirect-call-through-volatile fallback: a static volatile
     function pointer to memset, called via the pointer, followed
     by a memory-clobber asm barrier on GCC/Clang.

The build-time KSUID_FORCE_VOLATILE_FALLBACK macro bypasses every
primitive branch and forces the fallback path. This exists so CI
can exercise the fallback even on hosts that have explicit_bzero /
SecureZeroMemory available -- without it the fallback would ship
unverified on every supported matrix lane. Production builds never
set this flag.

The Critic risk register flagged seven concerns the implementation
addresses up front:

  R1 dual-header probe: try <string.h> first, then <strings.h>.
     Glibc 2.43 on Arch only declares the prototype in <string.h>;
     FreeBSD only in <strings.h>; macOS varies by SDK. Single-
     header probes silently miss the platform's primary location.
  R2 _DEFAULT_SOURCE: meson cc.has_function does NOT inherit
     add_project_arguments(), so the probe must pass
     -D_DEFAULT_SOURCE explicitly. Without it glibc hides the
     prototype and the probe lies "no explicit_bzero".
  R4 fallback DSE-resistance: a naive `volatile uint8_t *vp = p;
     for (...) vp[i] = 0` can still be elided by some compilers
     because the volatile qualifier on the pointee, without read
     observation, is not always honoured. The shim uses the
     stronger pattern -- volatile-qualified function-pointer +
     trailing __asm__ __volatile__ memory clobber.
  R4-coverage KSUID_FORCE_VOLATILE_FALLBACK: makes the fallback
     path exercisable even when the host has a primitive available,
     so the auto matrix is not the only thing testing the shim.
  R6 null guard + bounded for-loop: `if (!p || !n) return;` with
     for-loop counter avoids size_t underflow that
     -fsanitize=unsigned-integer-overflow would flag.
  R10 meson summary(): emits the selected backend on configure
     ("wipe backend: explicit_bzero (<string.h>)" on Linux glibc,
     etc.), so CI logs make backend selection auditable across
     the matrix without having to re-run feature probes.

Surface added:

  libksuid/wipe.h            new private header (NOT installed),
                              now gated on
                              KSUID_FORCE_VOLATILE_FALLBACK so the
                              fallback path is reachable on demand.
  meson.build                two cc.has_function probes
                              + KSUID_HAVE_EXPLICIT_BZERO_*
                              defines + summary() output.
  tests/test_wipe.c          smoke test: wipe a buffer, assert
                              every byte is zero. Proves the
                              shim *zeroes* (NOT that it resists
                              DSE -- objdump grep in commit 2
                              proves that). Covers full buffer,
                              subrange, zero-length, and NULL.
  tests/meson.build          test_wipe registered in base_tests.

Verified locally on Linux glibc 2.43 (auto build): meson summary
reports "wipe backend: explicit_bzero (<string.h>)"; 13/13 tests
pass; clang-tidy 22 still reports zero findings. Verified on
KSUID_FORCE_VOLATILE_FALLBACK build: same backend probe, but the
shim resolves to the volatile fn-ptr + asm clobber path; 13/13
tests still pass; objdump confirms zero explicit_bzero@plt calls
in the resulting library.

Commit 2 wires the shim into the four existing memset(0) sites in
rand_tls.c + chacha20.c that hold sensitive data, plus the CI
gates that prove the wipes survive optimisation in both build
modes.

Out of scope: TLS-state wipe at thread exit. That is issue #4.
---
 libksuid/wipe.h   | 102 ++++++++++++++++++++++++++++++++++++++++++++++
 meson.build       |  35 ++++++++++++++++
 tests/meson.build |   3 +-
 tests/test_wipe.c |  61 +++++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 1 deletion(-)
 create mode 100644 libksuid/wipe.h
 create mode 100644 tests/test_wipe.c

diff --git a/libksuid/wipe.h b/libksuid/wipe.h
new file mode 100644
index 0000000..9f444a5
--- /dev/null
+++ b/libksuid/wipe.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later
+ *
+ * DSE-resistant zeroizer. Plain memset(p, 0, n) on a buffer that the
+ * compiler proves is "dead" (never read again) is allowed -- and at
+ * -O2 and beyond, encouraged -- to be elided entirely. For sensitive
+ * material (CSPRNG seed, ChaCha20 internal state, freshly-drawn key
+ * bytes) that is exactly the wrong outcome.
+ *
+ * ksuid_explicit_bzero picks the strongest DSE-immune primitive the
+ * compile target offers, in this order:
+ *
+ *   1. explicit_bzero  (glibc 2.25+, MUSL, *BSD, macOS 14.4+)
+ *      Documented to resist optimisation; canonical answer.
+ *      Header lives in <strings.h> on Linux/FreeBSD/NetBSD;
+ *      <string.h> on OpenBSD/macOS. Two meson probes pick.
+ *
+ *   2. SecureZeroMemory  (Windows, <windows.h>)
+ *      MSDN explicitly guarantees the writes are not optimised
+ *      away. Macro over RtlSecureZeroMemory.
+ *
+ *   3. memset_s  (C11 Annex K)
+ *      Rare; gated behind __STDC_LIB_EXT1__. Required to be
+ *      DSE-immune by the standard.
+ *
+ *   4. Portable fallback: indirect call through a `volatile`
+ *      function pointer to memset, followed by a memory-clobber
+ *      barrier. The volatile qualifier on the pointer forces the
+ *      compiler to actually re-read it and emit the call; the
+ *      barrier prevents post-call dead-code analysis from proving
+ *      the writes are unobserved.
+ *
+ * The fallback is correct on every C11 toolchain we target but
+ * empirically a few rungs slower than explicit_bzero / Secure-
+ * ZeroMemory because it goes through an indirect call. CI on the
+ * Linux GCC lane runs an objdump grep that fails the build if the
+ * compiler elided the wipe.
+ *
+ * Issue #2 scope: short-lived locals (44-byte seed buffer in
+ * rand_tls.c, 64-byte keystream chunks, 16-word ChaCha state).
+ * Long-lived TLS state at thread-exit is issue #4.
+ */
+#ifndef KSUID_WIPE_H
+#define KSUID_WIPE_H
+
+#include <stddef.h>
+#include <string.h>
+
+/* The KSUID_FORCE_VOLATILE_FALLBACK build flag bypasses every
+ * platform-specific primitive and forces the indirect-call-through-
+ * volatile path. It exists so CI can exercise the fallback even on
+ * a host that has explicit_bzero / SecureZeroMemory available --
+ * without it the fallback ships unverified on every supported
+ * matrix lane. Production builds never set this. */
+#if !defined(KSUID_FORCE_VOLATILE_FALLBACK)
+#  if defined(KSUID_HAVE_EXPLICIT_BZERO_STRINGS_H)
+#    include <strings.h>
+#  elif defined(KSUID_HAVE_EXPLICIT_BZERO_STRING_H)
+/* explicit_bzero already pulled in by <string.h> above. */
+#  elif defined(_WIN32) || defined(__CYGWIN__)
+#    define WIN32_LEAN_AND_MEAN
+#    include <windows.h>
+#  elif defined(KSUID_HAVE_MEMSET_S)
+#    define __STDC_WANT_LIB_EXT1__ 1
+#    include <string.h>
+#  endif
+#endif
+
+static inline void
+ksuid_explicit_bzero (void *p, size_t n)
+{
+  if (p == NULL || n == 0)
+    return;
+
+#if !defined(KSUID_FORCE_VOLATILE_FALLBACK) && \
+    (defined(KSUID_HAVE_EXPLICIT_BZERO_STRINGS_H) \
+     || defined(KSUID_HAVE_EXPLICIT_BZERO_STRING_H))
+  explicit_bzero (p, n);
+#elif !defined(KSUID_FORCE_VOLATILE_FALLBACK) && \
+    (defined(_WIN32) || defined(__CYGWIN__))
+  SecureZeroMemory (p, n);
+#elif !defined(KSUID_FORCE_VOLATILE_FALLBACK) \
+    && defined(KSUID_HAVE_MEMSET_S)
+  memset_s (p, n, 0, n);
+#else
+  /* Indirect-call-through-volatile fallback. The function pointer
+   * is volatile-qualified, so the compiler must re-read it and
+   * emit a real call -- it cannot inline memset and elide the
+   * stores via DSE. The trailing memory clobber on GCC/Clang
+   * blocks any post-call dead-code reasoning.
+   *
+   * On MSVC <intrin.h>'s _ReadWriteBarrier serves the same role,
+   * but MSVC builds use the SecureZeroMemory branch above so this
+   * fallback is GCC/Clang in practice. */
+  static void *(*const volatile ksuid_memset_v) (void *, int, size_t) = memset;
+  ksuid_memset_v (p, 0, n);
+#  if defined(__GNUC__) || defined(__clang__)
+  __asm__ __volatile__ (""::"r" (p):"memory");
+#  endif
+#endif
+}
+
+#endif /* KSUID_WIPE_H */
diff --git a/meson.build b/meson.build
index 9d94aaf..77b840e 100644
--- a/meson.build
+++ b/meson.build
@@ -44,6 +44,41 @@ if cc.has_function('getentropy', prefix : '#include <unistd.h>')
   common_args += '-DKSUID_HAVE_GETENTROPY=1'
 endif
 
+# DSE-resistant wipe primitive (issue #2). The explicit_bzero
+# prototype lives in different headers on different platforms:
+# modern glibc (2.25+), macOS 14.4+, and OpenBSD declare it in
+# <string.h>; FreeBSD, NetBSD, and some older glibc / MUSL builds
+# put it in <strings.h>. Probe <string.h> first (the common case)
+# and fall back to <strings.h>.
+#
+# Pass _DEFAULT_SOURCE explicitly because meson does not inherit
+# add_project_arguments() into has_function() probes, and glibc
+# gates the prototype behind that feature macro.
+#
+# memset_s is the C11 Annex K third option; rarely shipped.
+# Windows uses SecureZeroMemory unconditionally (the macro lives
+# in <windows.h> and cc.has_function lies on macros, so detect by
+# host system instead).
+wipe_backend = 'volatile-fallback'
+if cc.has_function('explicit_bzero',
+    prefix : '#include <string.h>',
+    args   : ['-D_DEFAULT_SOURCE'])
+  common_args += '-DKSUID_HAVE_EXPLICIT_BZERO_STRING_H=1'
+  wipe_backend = 'explicit_bzero (<string.h>)'
+elif cc.has_function('explicit_bzero',
+    prefix : '#include <strings.h>',
+    args   : ['-D_DEFAULT_SOURCE'])
+  common_args += '-DKSUID_HAVE_EXPLICIT_BZERO_STRINGS_H=1'
+  wipe_backend = 'explicit_bzero (<strings.h>)'
+elif host_machine.system() == 'windows'
+  wipe_backend = 'SecureZeroMemory'
+elif cc.has_function('memset_s',
+    prefix : '#define __STDC_WANT_LIB_EXT1__ 1\n#include <string.h>')
+  common_args += '-DKSUID_HAVE_MEMSET_S=1'
+  wipe_backend = 'memset_s (C11 Annex K)'
+endif
+summary({'wipe backend' : wipe_backend}, section : 'libksuid')
+
 # Windows: the supported entropy source is BCryptGenRandom. The whole
 # /dev/urandom + getrandom + getentropy chain doesn't apply, and
 # rand_os.c switches to a Bcrypt-only path when KSUID_HAVE_BCRYPT is
diff --git a/tests/meson.build b/tests/meson.build
index b294d70..1e02334 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -11,7 +11,8 @@ threads_dep = dependency('threads', required : false)
 
 base_tests = ['test_smoke', 'test_parts', 'test_base62', 'test_parse_format',
               'test_sequence', 'test_rand_os',
-              'test_chacha20', 'test_new', 'test_simd_parity']
+              'test_chacha20', 'test_new', 'test_simd_parity',
+              'test_wipe']
 
 foreach t : base_tests
   exe = executable(t, t + '.c',
diff --git a/tests/test_wipe.c b/tests/test_wipe.c
new file mode 100644
index 0000000..0760369
--- /dev/null
+++ b/tests/test_wipe.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later
+ *
+ * Smoke test for libksuid/wipe.h. Proves the shim *zeroes* a buffer.
+ * Does NOT prove the shim resists dead-store elimination -- that
+ * property is verified by the objdump grep step in CI's Linux GCC
+ * lane, which fails the build if the wipe call disappeared from the
+ * library's optimised disassembly.
+ */
+#include <libksuid/wipe.h>
+#include "test_util.h"
+
+static void
+test_wipe_zeroes_a_full_buffer (void)
+{
+  uint8_t buf[64];
+  for (size_t i = 0; i < sizeof buf; ++i)
+    buf[i] = (uint8_t) (i ^ 0xa5);
+  ksuid_explicit_bzero (buf, sizeof buf);
+  for (size_t i = 0; i < sizeof buf; ++i)
+    ASSERT_EQ_INT (buf[i], 0);
+}
+
+static void
+test_wipe_zeroes_a_subrange (void)
+{
+  uint8_t buf[16];
+  memset (buf, 0xff, sizeof buf);
+  ksuid_explicit_bzero (buf + 4, 8);
+  ASSERT_EQ_INT (buf[3], 0xff);
+  for (size_t i = 4; i < 12; ++i)
+    ASSERT_EQ_INT (buf[i], 0);
+  ASSERT_EQ_INT (buf[12], 0xff);
+}
+
+static void
+test_wipe_handles_zero_length (void)
+{
+  uint8_t buf[4] = { 1, 2, 3, 4 };
+  ksuid_explicit_bzero (buf, 0);
+  ASSERT_EQ_INT (buf[0], 1);
+  ASSERT_EQ_INT (buf[3], 4);
+}
+
+static void
+test_wipe_handles_null_pointer (void)
+{
+  /* Documented contract: the shim is a no-op on (NULL, n) and on
+   * (p, 0). Neither must crash. */
+  ksuid_explicit_bzero (NULL, 0);
+  ksuid_explicit_bzero (NULL, 64);
+}
+
+int
+main (void)
+{
+  RUN_TEST (test_wipe_zeroes_a_full_buffer);
+  RUN_TEST (test_wipe_zeroes_a_subrange);
+  RUN_TEST (test_wipe_handles_zero_length);
+  RUN_TEST (test_wipe_handles_null_pointer);
+  TEST_MAIN_END ();
+}

From 2d88183093c985807b1096319128a1025a099382 Mon Sep 17 00:00:00 2001
From: Justin Kim <justin.joy.9to5@gmail.com>
Date: Thu, 30 Apr 2026 15:12:24 +0900
Subject: [PATCH 2/3] core: wipe sensitive buffers via ksuid_explicit_bzero
 (defeat DSE)

Closes #2 (commit 2 of 2 in the series).

Replaces four DSE-vulnerable plain-memset(0) sites with the new
ksuid_explicit_bzero shim that landed in commit 1. Adds two CI
gates that together prove the wipes (a) survive optimisation in
the default build and (b) work on the portable fallback path.

Sites converted to ksuid_explicit_bzero:

  libksuid/rand_tls.c:86   partial-seed wipe on RNG failure
  libksuid/rand_tls.c:109  kn[44] wipe after key/nonce copied to
                            TLS state
  libksuid/rand_tls.c:168  consumed-keystream wipe in
                            ksuid_random_bytes inner loop
  libksuid/chacha20.c:66   ksuid_chacha20_block local x[16] -- the
                            post-permutation state, which is
                            keystream-mixed and a leak vector for
                            stack-read primitives in sibling frames.

The fourth wipe (chacha20.c x[16]) was a Critic-flagged scope
addition: the issue body said "any temporary state that holds key
material" and the round-mixed x[] qualifies. Cost is one 64-byte
wipe per ChaCha block, dominated by the 20-round permutation it
follows; benchmarked overhead is in the noise.

The seed-time `r->buf` zero-fill (rand_tls.c:117) deliberately
stays as plain memset -- it is initialisation before the first
keystream block overwrites the buffer, not secret-erasure, so
DSE is not a hazard.

CI gates added (.github/workflows/ci-pr.yml):

  Phase 2a (auto build, Ubuntu GCC):
    Runs `objdump -d libksuid.so.<ver> | grep -E
    'call .*<(explicit_bzero|ksuid_explicit_bzero)'` and fails
    the build if fewer than four surviving call sites are found.
    The floor of 4 matches the source-level call count; observed
    locally on glibc 2.43 / GCC 15.2.1 is 6 surviving calls (the
    static-inline shim is partially inlined and partially kept
    out-of-line). The path matcher uses `find -type f` so the
    libksuid.so.<ver>.p object-archive directory cannot leak
    into the disasm input. Critic R5 mitigation.

  Phase 2b (KSUID_FORCE_VOLATILE_FALLBACK build):
    A NEW dedicated job that builds with
    -DKSUID_FORCE_VOLATILE_FALLBACK=1 to bypass every platform
    primitive and exercise the volatile-fn-ptr fallback path.
    test_wipe runs against this build to prove the fallback
    still zeroes correctly. A secondary objdump grep asserts
    the fallback library has zero `call <explicit_bzero@plt>`
    references, catching a regression where a future
    contributor adds a primitive without gating on the force
    macro. Critic R4-coverage mitigation -- without this, every
    matrix lane would silently select explicit_bzero and the
    fallback would ship untested.

Out-of-scope, deliberately not addressed:

  - TLS-state lifetime: the per-thread ksuid_tls_rng_t survives
    until the OS reclaims the TLS block. Wiping at thread exit is
    issue #4. A `TODO(#4)` banner in rand_tls.c documents the
    boundary so a reader does not assume this PR closed it.

13/13 tests pass on both auto and force-fallback builds; clang-
tidy 22 still reports zero findings; gst-indent leaves the
working tree untouched; meson reports `wipe backend: explicit_bzero
(<string.h>)` on Linux glibc 2.43.
---
 .github/workflows/ci-pr.yml | 89 ++++++++++++++++++++++++++++++++++++-
 libksuid/chacha20.c         |  9 ++++
 libksuid/rand_tls.c         | 24 +++++++---
 3 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-pr.yml b/.github/workflows/ci-pr.yml
index 5cf5255..b7d4524 100644
--- a/.github/workflows/ci-pr.yml
+++ b/.github/workflows/ci-pr.yml
@@ -105,6 +105,39 @@ jobs:
           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
           meson test -C builddir --print-errorlogs
 
+      - name: Verify ksuid_explicit_bzero survives optimization
+        if: runner.os == 'Linux' && matrix.compiler == 'gcc'
+        run: |
+          # Issue #2: the DSE-resistant wipe in libksuid/wipe.h must
+          # actually emit a call (or equivalent indirect call) at
+          # every site in rand_tls.c and chacha20.c. A plain memset
+          # would be elided by -O2; explicit_bzero / SecureZeroMemory /
+          # the volatile-fn-ptr fallback must NOT be. This step proves
+          # the shim's calls survive into the optimised library by
+          # grepping the disassembly for either form: a direct call
+          # to explicit_bzero@plt (where the static-inline shim got
+          # inlined into its caller) or a call to the out-of-line
+          # ksuid_explicit_bzero symbol (where the compiler kept
+          # the shim as a function). At least four wipe sites exist
+          # in source: three in rand_tls.c plus one in chacha20.c, so
+          # counting >=4 surviving calls is the correctness floor.
+          # Path uses find -type f to skip symlinks AND the
+          # libksuid.so.<ver>.p object-archive directory that meson
+          # leaves alongside the real .so.
+          set -eux
+          shared=$(find builddir -maxdepth 1 -type f -name 'libksuid.so.*' \
+                   | grep -E 'libksuid\.so\.[0-9]+\.[0-9]+\.[0-9]+$' \
+                   | head -1)
+          test -n "$shared" || { echo "::error::no versioned libksuid.so found in builddir" >&2; exit 1; }
+          n=$(objdump -d "$shared" \
+              | grep -cE 'call .*<(explicit_bzero|ksuid_explicit_bzero)' \
+              || true)
+          echo "wipe call sites in optimised .so: $n"
+          if [ "$n" -lt 4 ]; then
+            echo "::error::Expected at least 4 surviving wipe calls in libksuid.so.*; found $n. DSE may have eaten the wipes." >&2
+            exit 1
+          fi
+
       - name: Verify install lays down license artifacts
         if: runner.os == 'Linux'
         run: |
@@ -124,7 +157,61 @@ jobs:
           find staging -name 'libksuid.so' -print | grep -q .
 
   # ==========================================================================
-  # Phase 2b: meson dist round-trip
+  # Phase 2b: Wipe-shim fallback path coverage (issue #2)
+  # The default build on every supported matrix lane resolves
+  # ksuid_explicit_bzero to a platform primitive (explicit_bzero,
+  # SecureZeroMemory, or memset_s). The portable volatile-fn-ptr
+  # fallback in libksuid/wipe.h therefore ships *unexercised* unless
+  # CI explicitly forces it. This job builds with
+  # KSUID_FORCE_VOLATILE_FALLBACK=1, which disables every primitive
+  # branch and exercises the fallback. test_wipe asserts the
+  # fallback still zeroes the buffers it is given (it does not
+  # assert DSE-resistance -- the auto-build disasm gate above is
+  # the DSE-resistance witness; this job witnesses fallback
+  # correctness on a host that would otherwise never run the path).
+  # ==========================================================================
+  wipe-fallback:
+    name: Wipe shim fallback path coverage
+    needs: [lint]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y meson ninja-build
+
+      - name: Configure with KSUID_FORCE_VOLATILE_FALLBACK
+        run: |
+          meson setup builddir-fb \
+            -Dc_args=-DKSUID_FORCE_VOLATILE_FALLBACK=1
+
+      - name: Build
+        run: meson compile -C builddir-fb
+
+      - name: Run test_wipe under fallback shim
+        run: meson test -C builddir-fb test_wipe --print-errorlogs
+
+      - name: Confirm fallback path actually skipped explicit_bzero
+        run: |
+          # On the fallback path the shim must NOT emit a call to
+          # explicit_bzero@plt anywhere in the optimised library;
+          # grepping for any such reference catches a regression
+          # where a future contributor adds a primitive without
+          # gating on KSUID_FORCE_VOLATILE_FALLBACK.
+          set -eux
+          shared=$(find builddir-fb -maxdepth 1 -type f \
+                   -name 'libksuid.so.*' \
+                   | grep -E 'libksuid\.so\.[0-9]+\.[0-9]+\.[0-9]+$' \
+                   | head -1)
+          if objdump -d "$shared" | grep -qE 'call .*<explicit_bzero@plt>'; then
+            echo "::error::Fallback build leaked an explicit_bzero@plt call; KSUID_FORCE_VOLATILE_FALLBACK is not gating the primitive." >&2
+            exit 1
+          fi
+
+  # ==========================================================================
+  # Phase 2c: meson dist round-trip
   # Builds a source tarball, extracts it into a clean tree, and rebuilds
   # the library + tests from the tarball. Catches reproducibility
   # regressions in configure_file templates (R2 from issue #3): if a
diff --git a/libksuid/chacha20.c b/libksuid/chacha20.c
index c40518e..0593638 100644
--- a/libksuid/chacha20.c
+++ b/libksuid/chacha20.c
@@ -12,6 +12,8 @@
 
 #include <string.h>
 
+#include <libksuid/wipe.h>
+
 #define ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
 
 #define QR(a, b, c, d) do {                     \
@@ -55,4 +57,11 @@ ksuid_chacha20_block (uint8_t out[64], uint32_t state[16])
   state[12]++;
   if (state[12] == 0)
     state[13]++;
+
+  /* Wipe the local x[16] before returning. After the round-mixing it
+   * holds the keystream-mixed words, which a sibling stack frame
+   * with a stack-read primitive could lift. Defense in depth: cost
+   * is ~64 bytes wiped per 64-byte block, dominated by the chacha
+   * computation itself. */
+  ksuid_explicit_bzero (x, sizeof x);
 }
diff --git a/libksuid/rand_tls.c b/libksuid/rand_tls.c
index 1320e09..d117591 100644
--- a/libksuid/rand_tls.c
+++ b/libksuid/rand_tls.c
@@ -38,6 +38,14 @@
 #endif
 
 #include <libksuid/chacha20.h>
+#include <libksuid/wipe.h>
+
+/* TODO(#4): the per-thread ksuid_tls_rng_t below lives until the OS
+ * reclaims its TLS block, which means a thread that exits leaves the
+ * 64-byte ChaCha state and the 64-byte keystream buffer in process
+ * memory until then. Issue #4 covers wiping that state on thread
+ * exit. This file's wipes are bounded to the short-lived locals --
+ * the 44-byte seed buffer and the in-flight keystream chunks. */
 
 #define KSUID_RNG_RESEED_BYTES   (1u << 20)     /* 1 MiB                   */
 #define KSUID_RNG_RESEED_SECONDS 3600   /* 1 hour                  */
@@ -75,7 +83,7 @@ ksuid_tls_rng_seed (ksuid_tls_rng_t *r)
   uint8_t kn[44];               /* 32 key + 12 nonce */
   if (ksuid_os_random_bytes (kn, sizeof kn) < 0) {
     /* Wipe partial seed bytes before returning. */
-    memset (kn, 0, sizeof kn);
+    ksuid_explicit_bzero (kn, sizeof kn);
     return -1;
   }
   r->state[0] = KSUID_CHACHA20_C0;
@@ -96,9 +104,13 @@ ksuid_tls_rng_seed (ksuid_tls_rng_t *r)
         | ((uint32_t) kn[32 + i * 4 + 3] << 24);
   }
   /* Wipe seed material from local. The chacha state itself stays in
-   * TLS, but at least we limit the residue. */
-  memset (kn, 0, sizeof kn);
+   * TLS, but at least we limit the residue from leaving on the
+   * stack frame after this function returns. */
+  ksuid_explicit_bzero (kn, sizeof kn);
 
+  /* r->buf is about to be overwritten by the first chacha block; the
+   * memset is initialisation, not secret-erasure, so plain memset is
+   * fine here. */
   memset (r->buf, 0, sizeof r->buf);
   r->buf_pos = sizeof r->buf;   /* empty buffer, force first block */
   r->bytes_emitted = 0;
@@ -150,8 +162,10 @@ ksuid_random_bytes (uint8_t *buf, size_t n)
     size_t chunk = (n < avail) ? n : avail;
     memcpy (buf, r->buf + r->buf_pos, chunk);
     /* Wipe consumed keystream to limit forward exposure if memory is
-     * later inspected. */
-    memset (r->buf + r->buf_pos, 0, chunk);
+     * later inspected. ksuid_explicit_bzero blocks DSE here -- a
+     * plain memset would be elided by -O2 because the wiped bytes
+     * are not subsequently read. */
+    ksuid_explicit_bzero (r->buf + r->buf_pos, chunk);
     r->buf_pos += chunk;
     buf += chunk;
     n -= chunk;

From d86810f202e4f50473a5551353e8625488f70ee1 Mon Sep 17 00:00:00 2001
From: Justin Kim <justin.joy.9to5@gmail.com>
Date: Thu, 30 Apr 2026 15:20:16 +0900
Subject: [PATCH 3/3] fix(ci): hoist __STDC_WANT_LIB_EXT1__ to project args for
 memset_s path

macOS Clang on the macos-latest runner failed to compile wipe.h with

  ../libksuid/wipe.h:83:3: error: call to undeclared function
  'memset_s'; ISO C99 and later do not support implicit function
  declarations [-Wimplicit-function-declaration]

The other matrix lanes pass: Ubuntu GCC + Clang select
explicit_bzero (<string.h>); Windows MSVC selects SecureZeroMemory;
the wipe-fallback job forces the volatile path. Only macOS falls
through every explicit_bzero probe (the macos-latest SDK doesn't
expose the prototype the way the probe is shaped) and lands on the
memset_s arm.

Root cause: the memset_s prototype in <string.h> on every libc that
provides it (glibc, Apple libc, MUSL, ...) is gated behind
__STDC_WANT_LIB_EXT1__. wipe.h tried to opt in by defining the
macro right before its conditional <string.h> include, but the
header had already pulled <string.h> in unconditionally at the top
-- and the include guard prevented the second include from
re-emitting the prototype. The opt-in came too late.

Fix: when the meson probe selects the memset_s arm, also push
-D__STDC_WANT_LIB_EXT1__=1 into common_args so every translation
unit sees the macro set BEFORE its first <string.h> include,
regardless of include order. The redundant `#define` inside wipe.h
becomes a no-op and is replaced with a comment that explains why
the project-wide define is the only correct fix.

Linux glibc 2.43 is unaffected because the project-arg macro is
harmless when set on a libc that already exposes memset_s
unconditionally (or doesn't ship it at all). The primitive that
gets selected on Linux GCC is still explicit_bzero (<string.h>)
per the meson summary line.

Verified locally on Linux GCC: 13/13 tests pass on both auto and
KSUID_FORCE_VOLATILE_FALLBACK builds; meson summary unchanged. The
fix is small enough that it is being landed on the same PR as the
shim itself rather than as a separate follow-up.
---
 libksuid/wipe.h | 6 ++++--
 meson.build     | 7 +++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/libksuid/wipe.h b/libksuid/wipe.h
index 9f444a5..f20f16d 100644
--- a/libksuid/wipe.h
+++ b/libksuid/wipe.h
@@ -60,8 +60,10 @@
 #    define WIN32_LEAN_AND_MEAN
 #    include <windows.h>
 #  elif defined(KSUID_HAVE_MEMSET_S)
-#    define __STDC_WANT_LIB_EXT1__ 1
-#    include <string.h>
+/* __STDC_WANT_LIB_EXT1__ is set project-wide by meson when this
+ * branch is selected -- defining it here would be too late, the
+ * unconditional <string.h> include at the top of this header has
+ * already burned the prototype set without it. */
 #  endif
 #endif
 
diff --git a/meson.build b/meson.build
index 77b840e..c2eafdc 100644
--- a/meson.build
+++ b/meson.build
@@ -75,6 +75,13 @@ elif host_machine.system() == 'windows'
 elif cc.has_function('memset_s',
     prefix : '#define __STDC_WANT_LIB_EXT1__ 1\n#include <string.h>')
   common_args += '-DKSUID_HAVE_MEMSET_S=1'
+  # Annex K's memset_s prototype is gated behind __STDC_WANT_LIB_EXT1__
+  # on every libc that provides it (glibc, Apple libc, MUSL...).
+  # Defining the opt-in macro project-wide -- not just inside wipe.h
+  # -- guarantees it is set before <string.h> is first included by
+  # ANY translation unit, since wipe.h cannot rewind a prior include
+  # that already pulled <string.h> in without the opt-in.
+  common_args += '-D__STDC_WANT_LIB_EXT1__=1'
   wipe_backend = 'memset_s (C11 Annex K)'
 endif
 summary({'wipe backend' : wipe_backend}, section : 'libksuid')