Use libc's lgamma/tgamma instead of custom implementations

skirpichev · skirpichev · commit 7d14105b3e68 · 2023-02-08T10:33:16.000+03:00
diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c
@@ -92,16 +92,6 @@ get_math_module_state(PyObject *module)
     return (math_module_state *)state;
 }
 
-/*
-   sin(pi*x), giving accurate results for all finite x (especially x
-   integral or close to an integer).  This is here for use in the
-   reflection formula for the gamma function.  It conforms to IEEE
-   754-2008 for finite arguments, but not for infinities or nans.
-*/
-
-static const double pi = 3.141592653589793238462643383279502884197;
-static const double logpi = 1.144729885849400174143427351353058711647;
-
 /* Version of PyFloat_AsDouble() with in-line fast paths
    for exact floats and integers.  Gives a substantial
    speed improvement for extracting float arguments.
@@ -124,162 +114,6 @@ static const double logpi = 1.144729885849400174143427351353058711647;
         }                                                  \
     }
 
-static double
-m_sinpi(double x)
-{
-    double y, r;
-    int n;
-    /* this function should only ever be called for finite arguments */
-    assert(Py_IS_FINITE(x));
-    y = fmod(fabs(x), 2.0);
-    n = (int)round(2.0*y);
-    assert(0 <= n && n <= 4);
-    switch (n) {
-    case 0:
-        r = sin(pi*y);
-        break;
-    case 1:
-        r = cos(pi*(y-0.5));
-        break;
-    case 2:
-        /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
-           -0.0 instead of 0.0 when y == 1.0. */
-        r = sin(pi*(1.0-y));
-        break;
-    case 3:
-        r = -cos(pi*(y-1.5));
-        break;
-    case 4:
-        r = sin(pi*(y-2.0));
-        break;
-    default:
-        Py_UNREACHABLE();
-    }
-    return copysign(1.0, x)*r;
-}
-
-/* Implementation of the real gamma function.  In extensive but non-exhaustive
-   random tests, this function proved accurate to within <= 10 ulps across the
-   entire float domain.  Note that accuracy may depend on the quality of the
-   system math functions, the pow function in particular.  Special cases
-   follow C99 annex F.  The parameters and method are tailored to platforms
-   whose double format is the IEEE 754 binary64 format.
-
-   Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
-   and g=6.024680040776729583740234375; these parameters are amongst those
-   used by the Boost library.  Following Boost (again), we re-express the
-   Lanczos sum as a rational function, and compute it that way.  The
-   coefficients below were computed independently using MPFR, and have been
-   double-checked against the coefficients in the Boost source code.
-
-   For x < 0.0 we use the reflection formula.
-
-   There's one minor tweak that deserves explanation: Lanczos' formula for
-   Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
-   values, x+g-0.5 can be represented exactly.  However, in cases where it
-   can't be represented exactly the small error in x+g-0.5 can be magnified
-   significantly by the pow and exp calls, especially for large x.  A cheap
-   correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
-   involved in the computation of x+g-0.5 (that is, e = computed value of
-   x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
-
-   Correction factor
-   -----------------
-   Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
-   double, and e is tiny.  Then:
-
-     pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
-     = pow(y, x-0.5)/exp(y) * C,
-
-   where the correction_factor C is given by
-
-     C = pow(1-e/y, x-0.5) * exp(e)
-
-   Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
-
-     C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
-
-   But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
-
-     pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
-
-   Note that for accuracy, when computing r*C it's better to do
-
-     r + e*g/y*r;
-
-   than
-
-     r * (1 + e*g/y);
-
-   since the addition in the latter throws away most of the bits of
-   information in e*g/y.
-*/
-
-#define LANCZOS_N 13
-static const double lanczos_g = 6.024680040776729583740234375;
-static const double lanczos_g_minus_half = 5.524680040776729583740234375;
-static const double lanczos_num_coeffs[LANCZOS_N] = {
-    23531376880.410759688572007674451636754734846804940,
-    42919803642.649098768957899047001988850926355848959,
-    35711959237.355668049440185451547166705960488635843,
-    17921034426.037209699919755754458931112671403265390,
-    6039542586.3520280050642916443072979210699388420708,
-    1439720407.3117216736632230727949123939715485786772,
-    248874557.86205415651146038641322942321632125127801,
-    31426415.585400194380614231628318205362874684987640,
-    2876370.6289353724412254090516208496135991145378768,
-    186056.26539522349504029498971604569928220784236328,
-    8071.6720023658162106380029022722506138218516325024,
-    210.82427775157934587250973392071336271166969580291,
-    2.5066282746310002701649081771338373386264310793408
-};
-
-/* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
-static const double lanczos_den_coeffs[LANCZOS_N] = {
-    0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
-    13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
-
-/* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
-#define NGAMMA_INTEGRAL 23
-static const double gamma_integral[NGAMMA_INTEGRAL] = {
-    1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
-    3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
-    1307674368000.0, 20922789888000.0, 355687428096000.0,
-    6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
-    51090942171709440000.0, 1124000727777607680000.0,
-};
-
-/* Lanczos' sum L_g(x), for positive x */
-
-static double
-lanczos_sum(double x)
-{
-    double num = 0.0, den = 0.0;
-    int i;
-    assert(x > 0.0);
-    /* evaluate the rational function lanczos_sum(x).  For large
-       x, the obvious algorithm risks overflow, so we instead
-       rescale the denominator and numerator of the rational
-       function by x**(1-LANCZOS_N) and treat this as a
-       rational function in 1/x.  This also reduces the error for
-       larger x values.  The choice of cutoff point (5.0 below) is
-       somewhat arbitrary; in tests, smaller cutoff values than
-       this resulted in lower accuracy. */
-    if (x < 5.0) {
-        for (i = LANCZOS_N; --i >= 0; ) {
-            num = num * x + lanczos_num_coeffs[i];
-            den = den * x + lanczos_den_coeffs[i];
-        }
-    }
-    else {
-        for (i = 0; i < LANCZOS_N; i++) {
-            num = num / x + lanczos_num_coeffs[i];
-            den = den / x + lanczos_den_coeffs[i];
-        }
-    }
-    return num/den;
-}
-
 /* Constant for +infinity, generated in the same way as float('inf'). */
 
 static double
@@ -309,113 +143,46 @@ m_nan(void)
 
 #endif
 
+/*
+   gamma: the real gamma function.
+ */
+
 static double
-m_tgamma(double x)
+m_gamma(double x)
 {
-    double absx, r, y, z, sqrtpow;
-
     /* special cases */
     if (!Py_IS_FINITE(x)) {
         if (Py_IS_NAN(x) || x > 0.0)
-            return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
+            return x;  /* gamma(nan) = nan, gamma(inf) = inf */
         else {
             errno = EDOM;
-            return Py_NAN;  /* tgamma(-inf) = nan, invalid */
+            return Py_NAN;  /* gamma(-inf) = nan, invalid */
         }
     }
     if (x == 0.0) {
         errno = EDOM;
-        /* tgamma(+-0.0) = +-inf, divide-by-zero */
+        /* gamma(+-0.0) = +-inf, divide-by-zero */
         return copysign(Py_HUGE_VAL, x);
     }
 
     /* integer arguments */
     if (x == floor(x)) {
         if (x < 0.0) {
-            errno = EDOM;  /* tgamma(n) = nan, invalid for */
+            errno = EDOM;  /* gamma(n) = nan, invalid for */
             return Py_NAN; /* negative integers n */
         }
-        if (x <= NGAMMA_INTEGRAL)
-            return gamma_integral[(int)x - 1];
-    }
-    absx = fabs(x);
-
-    /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
-    if (absx < 1e-20) {
-        r = 1.0/x;
-        if (Py_IS_INFINITY(r))
-            errno = ERANGE;
-        return r;
-    }
-
-    /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
-       x > 200, and underflows to +-0.0 for x < -200, not a negative
-       integer. */
-    if (absx > 200.0) {
-        if (x < 0.0) {
-            return 0.0/m_sinpi(x);
-        }
-        else {
-            errno = ERANGE;
-            return Py_HUGE_VAL;
-        }
     }
 
-    y = absx + lanczos_g_minus_half;
-    /* compute error in sum */
-    if (absx > lanczos_g_minus_half) {
-        /* note: the correction can be foiled by an optimizing
-           compiler that (incorrectly) thinks that an expression like
-           a + b - a - b can be optimized to 0.0.  This shouldn't
-           happen in a standards-conforming compiler. */
-        double q = y - absx;
-        z = q - lanczos_g_minus_half;
-    }
-    else {
-        double q = y - lanczos_g_minus_half;
-        z = q - absx;
-    }
-    z = z * lanczos_g / y;
-    if (x < 0.0) {
-        r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
-        r -= z * r;
-        if (absx < 140.0) {
-            r /= pow(y, absx - 0.5);
-        }
-        else {
-            sqrtpow = pow(y, absx / 2.0 - 0.25);
-            r /= sqrtpow;
-            r /= sqrtpow;
-        }
-    }
-    else {
-        r = lanczos_sum(absx) / exp(y);
-        r += z * r;
-        if (absx < 140.0) {
-            r *= pow(y, absx - 0.5);
-        }
-        else {
-            sqrtpow = pow(y, absx / 2.0 - 0.25);
-            r *= sqrtpow;
-            r *= sqrtpow;
-        }
-    }
-    if (Py_IS_INFINITY(r))
-        errno = ERANGE;
-    return r;
+    return tgamma(x);
 }
 
 /*
    lgamma:  natural log of the absolute value of the Gamma function.
-   For large arguments, Lanczos' formula works extremely well here.
 */
 
 static double
 m_lgamma(double x)
 {
-    double r;
-    double absx;
-
     /* special cases */
     if (!Py_IS_FINITE(x)) {
         if (Py_IS_NAN(x))
@@ -430,28 +197,9 @@ m_lgamma(double x)
             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
             return Py_HUGE_VAL; /* integers n <= 0 */
         }
-        else {
-            return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
-        }
     }
 
-    absx = fabs(x);
-    /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
-    if (absx < 1e-20)
-        return -log(absx);
-
-    /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
-       having a second set of numerator coefficients for lanczos_sum that
-       absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
-       subtraction below; it's probably not worth it. */
-    r = log(lanczos_sum(absx)) - lanczos_g;
-    r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
-    if (x < 0.0)
-        /* Use reflection formula to get value for negative x. */
-        r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
-    if (Py_IS_INFINITY(r))
-        errno = ERANGE;
-    return r;
+    return lgamma(x);
 }
 
 /*
@@ -1159,7 +907,7 @@ math_floor(PyObject *module, PyObject *number)
     return PyLong_FromDouble(floor(x));
 }
 
-FUNC1A(gamma, m_tgamma,
+FUNC1A(gamma, m_gamma,
       "gamma($module, x, /)\n--\n\n"
       "Gamma function at x.")
 FUNC1A(lgamma, m_lgamma,