Skip to content

Commit 997073c

Browse files
authored
Sumprod(): Update citation. Reorder functions. Add final twosum() call. Improve comments. (#101249)
1 parent 79af40a commit 997073c

File tree

1 file changed

+21
-38
lines changed

1 file changed

+21
-38
lines changed

Modules/mathmodule.c

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,34 +2833,20 @@ long_add_would_overflow(long a, long b)
28332833

28342834
/*
28352835
Double and triple length extended precision floating point arithmetic
2836-
based on ideas from three sources:
2837-
2838-
Improved Kahan–Babuška algorithm by Arnold Neumaier
2839-
https://www.mat.univie.ac.at/~neum/scan/01.pdf
2836+
based on:
28402837
28412838
A Floating-Point Technique for Extending the Available Precision
28422839
by T. J. Dekker
28432840
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
28442841
2845-
Ultimately Fast Accurate Summation by Siegfried M. Rump
2846-
https://www.tuhh.de/ti3/paper/rump/Ru08b.pdf
2847-
2848-
Double length functions:
2849-
* dl_split() exact split of a C double into two half precision components.
2850-
* dl_mul() exact multiplication of two C doubles.
2851-
2852-
Triple length functions and constant:
2853-
* tl_zero is a triple length zero for starting or resetting an accumulation.
2854-
* tl_add() compensated addition of a C double to a triple length number.
2855-
* tl_fma() performs a triple length fused-multiply-add.
2856-
* tl_to_d() converts from triple length number back to a C double.
2842+
Accurate Sum and Dot Product
2843+
by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
2844+
https://doi.org/10.1137/030601818
2845+
https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
28572846
28582847
*/
28592848

28602849
typedef struct{ double hi; double lo; } DoubleLength;
2861-
typedef struct{ double hi; double lo; double tiny; } TripleLength;
2862-
2863-
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
28642850

28652851
static inline DoubleLength
28662852
twosum(double a, double b)
@@ -2874,25 +2860,9 @@ twosum(double a, double b)
28742860
return (DoubleLength) {s, t};
28752861
}
28762862

2877-
static inline TripleLength
2878-
tl_add(TripleLength total, double x)
2879-
{
2880-
/* Input: x total.hi total.lo total.tiny
2881-
|--- twosum ---|
2882-
s.hi s.lo
2883-
|--- twosum ---|
2884-
t.hi t.lo
2885-
|--- single sum ---|
2886-
Output: s.hi t.hi tiny
2887-
*/
2888-
DoubleLength s = twosum(x, total.hi);
2889-
DoubleLength t = twosum(s.lo, total.lo);
2890-
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
2891-
}
2892-
28932863
static inline DoubleLength
28942864
dl_split(double x) {
2895-
double t = x * 134217729.0; /* Veltkamp constant = float(0x8000001) */
2865+
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
28962866
double hi = t - (t - x);
28972867
double lo = x - hi;
28982868
return (DoubleLength) {hi, lo};
@@ -2911,6 +2881,18 @@ dl_mul(double x, double y)
29112881
return (DoubleLength) {z, zz};
29122882
}
29132883

2884+
typedef struct{ double hi; double lo; double tiny; } TripleLength;
2885+
2886+
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
2887+
2888+
static inline TripleLength
2889+
tl_add(TripleLength total, double x)
2890+
{
2891+
DoubleLength s = twosum(x, total.hi);
2892+
DoubleLength t = twosum(s.lo, total.lo);
2893+
return (TripleLength) {s.hi, t.hi, t.lo + total.tiny};
2894+
}
2895+
29142896
static inline TripleLength
29152897
tl_fma(TripleLength total, double p, double q)
29162898
{
@@ -2922,7 +2904,8 @@ tl_fma(TripleLength total, double p, double q)
29222904
static inline double
29232905
tl_to_d(TripleLength total)
29242906
{
2925-
return total.tiny + total.lo + total.hi;
2907+
DoubleLength last = twosum(total.lo, total.hi);
2908+
return total.tiny + last.lo + last.hi;
29262909
}
29272910

29282911
/*[clinic input]
@@ -3039,7 +3022,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
30393022
}
30403023

30413024
finalize_int_path:
3042-
// # We're finished, overflowed, or have a non-int
3025+
// We're finished, overflowed, or have a non-int
30433026
int_path_enabled = false;
30443027
if (int_total_in_use) {
30453028
term_i = PyLong_FromLong(int_total);

0 commit comments

Comments
 (0)