Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
861 changes: 173 additions & 688 deletions numpower.c

Large diffs are not rendered by default.

166 changes: 166 additions & 0 deletions src/ndarray_types.c
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,172 @@ int ndarray_fp128_isnan(ndarray_fp128_t a) {
return isnan((double)a);
# endif
}

/* ── Transcendental `__float128` helpers ──────────────────────────────────
Out-of-line wrappers around libquadmath's `expq`/`exp2q`/`expm1q`/`logq`/
`log1pq`/`log2q`/`log10q`/`logbq` so the libquadmath-vs-long-double
fallback choice is resolved once in this translation unit (where
`HAVE_QUADMATH` and `config.h` are in scope). Without these wrappers the
choice would re-resolve at every header include site and pick the
`long double` fallback whenever the includer pulled `ndarray_types.h`
via a transitive header before its own `#include "config.h"`. */

/**
* @brief `exp(a)` at the highest precision the platform offers.
*
* Uses `expq` from libquadmath when available (full 113-bit fp128). Falls
* back to `expl((long double)a)` (~64 bits) when libquadmath is absent —
* the same fallback contract used for `ndarray_fp128_sqrt`.
*
* @param[in] a Input.
* @return `e^a` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_exp(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return expq(a);
# else
return (ndarray_fp128_t)expl((long double)a);
# endif
}

/**
* @brief `2^a` at fp128 precision.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input.
* @return `2^a` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_exp2(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return exp2q(a);
# else
return (ndarray_fp128_t)exp2l((long double)a);
# endif
}

/**
* @brief `e^a − 1` at fp128 precision, preserving precision near zero.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input.
* @return `e^a - 1` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_expm1(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return expm1q(a);
# else
return (ndarray_fp128_t)expm1l((long double)a);
# endif
}

/**
* @brief Natural logarithm `ln(a)` at fp128 precision.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input (positive for finite output).
* @return `ln(a)` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_log(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return logq(a);
# else
return (ndarray_fp128_t)logl((long double)a);
# endif
}

/**
* @brief `ln(1 + a)` at fp128 precision, preserving precision near zero.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input (greater than -1 for finite output).
* @return `ln(1 + a)` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_log1p(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return log1pq(a);
# else
return (ndarray_fp128_t)log1pl((long double)a);
# endif
}

/**
* @brief Base-2 logarithm `log₂(a)` at fp128 precision.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input (positive for finite output).
* @return `log₂(a)` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_log2(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return log2q(a);
# else
return (ndarray_fp128_t)log2l((long double)a);
# endif
}

/**
* @brief Base-10 logarithm `log₁₀(a)` at fp128 precision.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input (positive for finite output).
* @return `log₁₀(a)` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_log10(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return log10q(a);
# else
return (ndarray_fp128_t)log10l((long double)a);
# endif
}

/**
* @brief `logb(a)` at fp128 precision: floor(log2(|a|)) for normals.
* @see `ndarray_fp128_exp` for the libquadmath fallback contract.
* @param[in] a Input.
* @return `logb(a)` in the native fp128 storage.
*/
ndarray_fp128_t ndarray_fp128_logb(ndarray_fp128_t a) {
# if HAVE_QUADMATH
return logbq(a);
# else
return (ndarray_fp128_t)logbl((long double)a);
# endif
}

/* ── Trigonometric / hyperbolic / rounding helpers ─────────────────────────
Same out-of-line wrapper pattern as the exp/log family: the
libquadmath choice is resolved once in this translation unit (where
`HAVE_QUADMATH` is in scope) instead of at every header-include site.
For each, the body is `if HAVE_QUADMATH return Xq(a); else return
(ndarray_fp128_t)Xl((long double)a);`. A single macro `NDARRAY_FP128_LIBM_WRAPPER`
picks the right call by expanding the `#if HAVE_QUADMATH` inside the
macro body — call sites pass both names (`Xq`, `Xl`) once and the
unused branch is preprocessed away. Function-level Doxygen lives on
the prototypes in `ndarray_types.h`. */
#define NDARRAY_FP128_LIBM_WRAPPER(NAME, Q_FN, L_FN) \
ndarray_fp128_t NAME(ndarray_fp128_t a) { \
return \
Q_FN(a); \
}
#if !HAVE_QUADMATH
# undef NDARRAY_FP128_LIBM_WRAPPER
# define NDARRAY_FP128_LIBM_WRAPPER(NAME, Q_FN, L_FN) \
ndarray_fp128_t NAME(ndarray_fp128_t a) { \
return (ndarray_fp128_t)L_FN((long double)a); \
}
#endif

NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_cos, cosq, cosl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_tan, tanq, tanl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arcsin, asinq, asinl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arccos, acosq, acosl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arctan, atanq, atanl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_sinh, sinhq, sinhl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_cosh, coshq, coshl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_tanh, tanhq, tanhl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arcsinh, asinhq, asinhl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arccosh, acoshq, acoshl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_arctanh, atanhq, atanhl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_rint, rintq, rintl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_trunc, truncq, truncl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_floor, floorq, floorl)
NDARRAY_FP128_LIBM_WRAPPER(ndarray_fp128_ceil, ceilq, ceill)

#undef NDARRAY_FP128_LIBM_WRAPPER
#endif /* NDARRAY_HAVE_FLOAT128 */

ndarray_fp128_t ndarray_double_to_fp128(double val) {
Expand Down
78 changes: 78 additions & 0 deletions src/ndarray_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,58 @@ uint16_t ndarray_double_to_fp16(double val);
`#include "config.h"`. */
ndarray_fp128_t ndarray_fp128_sqrt(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_sin (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_exp (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_exp2 (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_expm1(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_log (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_log1p(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_log2 (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_log10(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_logb (ndarray_fp128_t a);
/* Trigonometric and hyperbolic — sinq/cosq/...q from libquadmath
when present, otherwise long-double libm fallback. */
ndarray_fp128_t ndarray_fp128_cos (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_tan (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arcsin (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arccos (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arctan (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_sinh (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_cosh (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_tanh (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arcsinh(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arccosh(ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_arctanh(ndarray_fp128_t a);
/* Rounding — rintq/truncq/floorq/ceilq from libquadmath. */
ndarray_fp128_t ndarray_fp128_rint (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_trunc (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_floor (ndarray_fp128_t a);
ndarray_fp128_t ndarray_fp128_ceil (ndarray_fp128_t a);
int ndarray_fp128_isnan(ndarray_fp128_t a);
# define NDARRAY_FP128_SQRT(a) ndarray_fp128_sqrt(a)
# define NDARRAY_FP128_SIN(a) ndarray_fp128_sin(a)
# define NDARRAY_FP128_EXP(a) ndarray_fp128_exp(a)
# define NDARRAY_FP128_EXP2(a) ndarray_fp128_exp2(a)
# define NDARRAY_FP128_EXPM1(a) ndarray_fp128_expm1(a)
# define NDARRAY_FP128_LOG(a) ndarray_fp128_log(a)
# define NDARRAY_FP128_LOG1P(a) ndarray_fp128_log1p(a)
# define NDARRAY_FP128_LOG2(a) ndarray_fp128_log2(a)
# define NDARRAY_FP128_LOG10(a) ndarray_fp128_log10(a)
# define NDARRAY_FP128_LOGB(a) ndarray_fp128_logb(a)
# define NDARRAY_FP128_COS(a) ndarray_fp128_cos(a)
# define NDARRAY_FP128_TAN(a) ndarray_fp128_tan(a)
# define NDARRAY_FP128_ARCSIN(a) ndarray_fp128_arcsin(a)
# define NDARRAY_FP128_ARCCOS(a) ndarray_fp128_arccos(a)
# define NDARRAY_FP128_ARCTAN(a) ndarray_fp128_arctan(a)
# define NDARRAY_FP128_SINH(a) ndarray_fp128_sinh(a)
# define NDARRAY_FP128_COSH(a) ndarray_fp128_cosh(a)
# define NDARRAY_FP128_TANH(a) ndarray_fp128_tanh(a)
# define NDARRAY_FP128_ARCSINH(a) ndarray_fp128_arcsinh(a)
# define NDARRAY_FP128_ARCCOSH(a) ndarray_fp128_arccosh(a)
# define NDARRAY_FP128_ARCTANH(a) ndarray_fp128_arctanh(a)
# define NDARRAY_FP128_RINT(a) ndarray_fp128_rint(a)
# define NDARRAY_FP128_TRUNC(a) ndarray_fp128_trunc(a)
# define NDARRAY_FP128_FLOOR(a) ndarray_fp128_floor(a)
# define NDARRAY_FP128_CEIL(a) ndarray_fp128_ceil(a)
# define NDARRAY_FP128_ISNAN(a) ndarray_fp128_isnan(a)
# define NDARRAY_FP128_FROM_D(d) ((ndarray_fp128_t)(d))
# define NDARRAY_FP128_FROM_LD(ld) ((ndarray_fp128_t)(ld))
Expand Down Expand Up @@ -88,6 +137,35 @@ uint16_t ndarray_double_to_fp16(double val);
# define NDARRAY_FP128_ABS(a) ndarray_dd_abs(a)
# define NDARRAY_FP128_SQRT(a) ndarray_dd_sqrt(a)
# define NDARRAY_FP128_SIN(a) ndarray_dd_from_double(sin(ndarray_dd_to_double(a)))
/* Transcendental fp128 ops on the DD backend route through `double`
so the macro contract stays platform-portable; accuracy is the same
tier as the GPU `dd_*` reference path that already promotes
`dd → double → libm → dd` for sin/cos/exp/log. Linux GCC x86-64 with
libquadmath is the only configuration that yields full 113-bit
transcendentals — every other platform tops out at fp64 here. */
# define NDARRAY_FP128_EXP(a) ndarray_dd_from_double(exp(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_EXP2(a) ndarray_dd_from_double(exp2(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_EXPM1(a) ndarray_dd_from_double(expm1(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_LOG(a) ndarray_dd_from_double(log(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_LOG1P(a) ndarray_dd_from_double(log1p(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_LOG2(a) ndarray_dd_from_double(log2(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_LOG10(a) ndarray_dd_from_double(log10(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_LOGB(a) ndarray_dd_from_double(logb(ndarray_dd_to_double(a)))
# define NDARRAY_FP128_COS(a) ndarray_dd_from_double(cos (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_TAN(a) ndarray_dd_from_double(tan (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCSIN(a) ndarray_dd_from_double(asin (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCCOS(a) ndarray_dd_from_double(acos (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCTAN(a) ndarray_dd_from_double(atan (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_SINH(a) ndarray_dd_from_double(sinh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_COSH(a) ndarray_dd_from_double(cosh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_TANH(a) ndarray_dd_from_double(tanh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCSINH(a) ndarray_dd_from_double(asinh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCCOSH(a) ndarray_dd_from_double(acosh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_ARCTANH(a) ndarray_dd_from_double(atanh (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_RINT(a) ndarray_dd_from_double(rint (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_TRUNC(a) ndarray_dd_from_double(trunc (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_FLOOR(a) ndarray_dd_from_double(floor (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_CEIL(a) ndarray_dd_from_double(ceil (ndarray_dd_to_double(a)))
# define NDARRAY_FP128_EQ(a, b) (ndarray_dd_cmp((a), (b)) == 0)
# define NDARRAY_FP128_LT(a, b) (ndarray_dd_cmp((a), (b)) < 0)
# define NDARRAY_FP128_ISZERO(a) ndarray_dd_iszero(a)
Expand Down
Loading
Loading