RubixML · SkibidiProduction · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/numpower.c b/numpower.c
diff --git a/src/ndarray_types.c b/src/ndarray_types.c
@@ -279,6 +279,172 @@ int ndarray_fp128_isnan(ndarray_fp128_t a) {
     return isnan((double)a);
 #  endif
 }
+
+/* ── Transcendental `__float128` helpers ──────────────────────────────────
+   Out-of-line wrappers around libquadmath's `expq`/`exp2q`/`expm1q`/`logq`/
+   `log1pq`/`log2q`/`log10q`/`logbq` so the libquadmath-vs-long-double
+   fallback choice is resolved once in this translation unit (where
+   `HAVE_QUADMATH` and `config.h` are in scope). Without these wrappers the
+   choice would re-resolve at every header include site and pick the
+   `long double` fallback whenever the includer pulled `ndarray_types.h`
+   via a transitive header before its own `#include "config.h"`. */
+
+/**
+ * @brief `exp(a)` at the highest precision the platform offers.
+ *
+ * Uses `expq` from libquadmath when available (full 113-bit fp128). Falls
+ * back to `expl((long double)a)` (~64 bits) when libquadmath is absent —
+ * the same fallback contract used for `ndarray_fp128_sqrt`.
+ *
+ * @param[in] a Input.
+ * @return `e^a` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_exp(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return expq(a);
+#  else
+    return (ndarray_fp128_t)expl((long double)a);
+#  endif
+}
+
+/**
+ * @brief `2^a` at fp128 precision.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input.
+ * @return `2^a` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_exp2(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return exp2q(a);
+#  else
+    return (ndarray_fp128_t)exp2l((long double)a);
+#  endif
+}
+
+/**
+ * @brief `e^a − 1` at fp128 precision, preserving precision near zero.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input.
+ * @return `e^a - 1` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_expm1(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return expm1q(a);
+#  else
+    return (ndarray_fp128_t)expm1l((long double)a);
+#  endif
+}
+
+/**
+ * @brief Natural logarithm `ln(a)` at fp128 precision.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input (positive for finite output).
+ * @return `ln(a)` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_log(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return logq(a);
+#  else
+    return (ndarray_fp128_t)logl((long double)a);
+#  endif
+}
+
+/**
+ * @brief `ln(1 + a)` at fp128 precision, preserving precision near zero.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input (greater than -1 for finite output).
+ * @return `ln(1 + a)` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_log1p(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return log1pq(a);
+#  else
+    return (ndarray_fp128_t)log1pl((long double)a);
+#  endif
+}
+
+/**
+ * @brief Base-2 logarithm `log₂(a)` at fp128 precision.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input (positive for finite output).
+ * @return `log₂(a)` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_log2(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return log2q(a);
+#  else
+    return (ndarray_fp128_t)log2l((long double)a);
+#  endif
+}
+
+/**
+ * @brief Base-10 logarithm `log₁₀(a)` at fp128 precision.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input (positive for finite output).
+ * @return `log₁₀(a)` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_log10(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return log10q(a);
+#  else
+    return (ndarray_fp128_t)log10l((long double)a);
+#  endif
+}
+
+/**
+ * @brief `logb(a)` at fp128 precision: floor(log2(|a|)) for normals.
+ * @see `ndarray_fp128_exp` for the libquadmath fallback contract.
+ * @param[in] a Input.
+ * @return `logb(a)` in the native fp128 storage.
+ */
+ndarray_fp128_t ndarray_fp128_logb(ndarray_fp128_t a) {
+#  if HAVE_QUADMATH
+    return logbq(a);
+#  else
+    return (ndarray_fp128_t)logbl((long double)a);
+#  endif
+}
+
+/* ── Trigonometric / hyperbolic / rounding helpers ─────────────────────────
+   Same out-of-line wrapper pattern as the exp/log family: the
+   libquadmath choice is resolved once in this translation unit (where
+   `HAVE_QUADMATH` is in scope) instead of at every header-include site.
+   For each, the body is `if HAVE_QUADMATH return Xq(a); else return
+   (ndarray_fp128_t)Xl((long double)a);`. A single macro `NDARRAY_FP128_LIBM_WRAPPER`
+   picks the right call by expanding the `#if HAVE_QUADMATH` inside the
+   macro body — call sites pass both names (`Xq`, `Xl`) once and the
+   unused branch is preprocessed away. Function-level Doxygen lives on
+   the prototypes in `ndarray_types.h`. */
+#define NDARRAY_FP128_LIBM_WRAPPER(NAME, Q_FN, L_FN)                              \
+    ndarray_fp128_t NAME(ndarray_fp128_t a) {                                     \
+        return                                                                    \
+                Q_FN(a);                                                          \
+    }
+#if !HAVE_QUADMATH
+#  undef  NDARRAY_FP128_LIBM_WRAPPER
+#  define NDARRAY_FP128_LIBM_WRAPPER(NAME, Q_FN, L_FN)                            \
+    ndarray_fp128_t NAME(ndarray_fp128_t a) {                                     \
+        return (ndarray_fp128_t)L_FN((long double)a);                             \
+    }
+#endif
+
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_cos,     cosq,     cosl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_tan,     tanq,     tanl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arcsin,  asinq,    asinl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arccos,  acosq,    acosl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arctan,  atanq,    atanl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_sinh,    sinhq,    sinhl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_cosh,    coshq,    coshl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_tanh,    tanhq,    tanhl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arcsinh, asinhq,   asinhl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arccosh, acoshq,   acoshl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arctanh, atanhq,   atanhl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_rint,    rintq,    rintl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_trunc,   truncq,   truncl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_floor,   floorq,   floorl)
+NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_ceil,    ceilq,    ceill)
+
+#undef NDARRAY_FP128_LIBM_WRAPPER
 #endif /* NDARRAY_HAVE_FLOAT128 */
 
 ndarray_fp128_t ndarray_double_to_fp128(double val) {

diff --git a/src/ndarray_types.h b/src/ndarray_types.h
@@ -52,9 +52,58 @@ uint16_t ndarray_double_to_fp16(double val);
       `#include "config.h"`. */
    ndarray_fp128_t ndarray_fp128_sqrt(ndarray_fp128_t a);
    ndarray_fp128_t ndarray_fp128_sin (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_exp  (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_exp2 (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_expm1(ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_log  (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_log1p(ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_log2 (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_log10(ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_logb (ndarray_fp128_t a);
+   /* Trigonometric and hyperbolic — sinq/cosq/...q from libquadmath
+      when present, otherwise long-double libm fallback. */
+   ndarray_fp128_t ndarray_fp128_cos    (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_tan    (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arcsin (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arccos (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arctan (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_sinh   (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_cosh   (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_tanh   (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arcsinh(ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arccosh(ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_arctanh(ndarray_fp128_t a);
+   /* Rounding — rintq/truncq/floorq/ceilq from libquadmath. */
+   ndarray_fp128_t ndarray_fp128_rint   (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_trunc  (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_floor  (ndarray_fp128_t a);
+   ndarray_fp128_t ndarray_fp128_ceil   (ndarray_fp128_t a);
    int             ndarray_fp128_isnan(ndarray_fp128_t a);
 #  define NDARRAY_FP128_SQRT(a)     ndarray_fp128_sqrt(a)
 #  define NDARRAY_FP128_SIN(a)      ndarray_fp128_sin(a)
+#  define NDARRAY_FP128_EXP(a)      ndarray_fp128_exp(a)
+#  define NDARRAY_FP128_EXP2(a)     ndarray_fp128_exp2(a)
+#  define NDARRAY_FP128_EXPM1(a)    ndarray_fp128_expm1(a)
+#  define NDARRAY_FP128_LOG(a)      ndarray_fp128_log(a)
+#  define NDARRAY_FP128_LOG1P(a)    ndarray_fp128_log1p(a)
+#  define NDARRAY_FP128_LOG2(a)     ndarray_fp128_log2(a)
+#  define NDARRAY_FP128_LOG10(a)    ndarray_fp128_log10(a)
+#  define NDARRAY_FP128_LOGB(a)     ndarray_fp128_logb(a)
+#  define NDARRAY_FP128_COS(a)      ndarray_fp128_cos(a)
+#  define NDARRAY_FP128_TAN(a)      ndarray_fp128_tan(a)
+#  define NDARRAY_FP128_ARCSIN(a)   ndarray_fp128_arcsin(a)
+#  define NDARRAY_FP128_ARCCOS(a)   ndarray_fp128_arccos(a)
+#  define NDARRAY_FP128_ARCTAN(a)   ndarray_fp128_arctan(a)
+#  define NDARRAY_FP128_SINH(a)     ndarray_fp128_sinh(a)
+#  define NDARRAY_FP128_COSH(a)     ndarray_fp128_cosh(a)
+#  define NDARRAY_FP128_TANH(a)     ndarray_fp128_tanh(a)
+#  define NDARRAY_FP128_ARCSINH(a)  ndarray_fp128_arcsinh(a)
+#  define NDARRAY_FP128_ARCCOSH(a)  ndarray_fp128_arccosh(a)
+#  define NDARRAY_FP128_ARCTANH(a)  ndarray_fp128_arctanh(a)
+#  define NDARRAY_FP128_RINT(a)     ndarray_fp128_rint(a)
+#  define NDARRAY_FP128_TRUNC(a)    ndarray_fp128_trunc(a)
+#  define NDARRAY_FP128_FLOOR(a)    ndarray_fp128_floor(a)
+#  define NDARRAY_FP128_CEIL(a)     ndarray_fp128_ceil(a)
 #  define NDARRAY_FP128_ISNAN(a)    ndarray_fp128_isnan(a)
 #  define NDARRAY_FP128_FROM_D(d)   ((ndarray_fp128_t)(d))
 #  define NDARRAY_FP128_FROM_LD(ld) ((ndarray_fp128_t)(ld))
@@ -88,6 +137,35 @@ uint16_t ndarray_double_to_fp16(double val);
 #  define NDARRAY_FP128_ABS(a)      ndarray_dd_abs(a)
 #  define NDARRAY_FP128_SQRT(a)     ndarray_dd_sqrt(a)
 #  define NDARRAY_FP128_SIN(a)      ndarray_dd_from_double(sin(ndarray_dd_to_double(a)))
+   /* Transcendental fp128 ops on the DD backend route through `double`
+      so the macro contract stays platform-portable; accuracy is the same
+      tier as the GPU `dd_*` reference path that already promotes
+      `dd → double → libm → dd` for sin/cos/exp/log. Linux GCC x86-64 with
+      libquadmath is the only configuration that yields full 113-bit
+      transcendentals — every other platform tops out at fp64 here. */
+#  define NDARRAY_FP128_EXP(a)      ndarray_dd_from_double(exp(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_EXP2(a)     ndarray_dd_from_double(exp2(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_EXPM1(a)    ndarray_dd_from_double(expm1(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_LOG(a)      ndarray_dd_from_double(log(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_LOG1P(a)    ndarray_dd_from_double(log1p(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_LOG2(a)     ndarray_dd_from_double(log2(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_LOG10(a)    ndarray_dd_from_double(log10(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_LOGB(a)     ndarray_dd_from_double(logb(ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_COS(a)      ndarray_dd_from_double(cos    (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_TAN(a)      ndarray_dd_from_double(tan    (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCSIN(a)   ndarray_dd_from_double(asin   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCCOS(a)   ndarray_dd_from_double(acos   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCTAN(a)   ndarray_dd_from_double(atan   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_SINH(a)     ndarray_dd_from_double(sinh   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_COSH(a)     ndarray_dd_from_double(cosh   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_TANH(a)     ndarray_dd_from_double(tanh   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCSINH(a)  ndarray_dd_from_double(asinh  (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCCOSH(a)  ndarray_dd_from_double(acosh  (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_ARCTANH(a)  ndarray_dd_from_double(atanh  (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_RINT(a)     ndarray_dd_from_double(rint   (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_TRUNC(a)    ndarray_dd_from_double(trunc  (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_FLOOR(a)    ndarray_dd_from_double(floor  (ndarray_dd_to_double(a)))
+#  define NDARRAY_FP128_CEIL(a)     ndarray_dd_from_double(ceil   (ndarray_dd_to_double(a)))
 #  define NDARRAY_FP128_EQ(a, b)    (ndarray_dd_cmp((a), (b)) == 0)
 #  define NDARRAY_FP128_LT(a, b)    (ndarray_dd_cmp((a), (b)) <  0)
 #  define NDARRAY_FP128_ISZERO(a)   ndarray_dd_iszero(a)