Skip to content

Commit 9c6fcb5

Browse files
committed
Split Int into Int and MinInt
`MinInt` contains the basic methods that are only needed by integers involved in widening operations, i.e. big integers. `Int` retains all other operations and convenience methods.
1 parent 7f33201 commit 9c6fcb5

File tree

12 files changed

+210
-172
lines changed

12 files changed

+210
-172
lines changed

Diff for: src/float/add.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
/// Returns `a + b`
55
fn add<F: Float>(a: F, b: F) -> F
@@ -57,17 +57,17 @@ where
5757
}
5858

5959
// zero + anything = anything
60-
if a_abs == Int::ZERO {
60+
if a_abs == MinInt::ZERO {
6161
// but we need to get the sign right for zero + zero
62-
if b_abs == Int::ZERO {
62+
if b_abs == MinInt::ZERO {
6363
return F::from_repr(a.repr() & b.repr());
6464
} else {
6565
return b;
6666
}
6767
}
6868

6969
// anything + zero = anything
70-
if b_abs == Int::ZERO {
70+
if b_abs == MinInt::ZERO {
7171
return a;
7272
}
7373
}
@@ -113,10 +113,10 @@ where
113113
// Shift the significand of b by the difference in exponents, with a sticky
114114
// bottom bit to get rounding correct.
115115
let align = a_exponent.wrapping_sub(b_exponent).cast();
116-
if align != Int::ZERO {
116+
if align != MinInt::ZERO {
117117
if align < bits {
118118
let sticky =
119-
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
119+
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
120120
b_significand = (b_significand >> align.cast()) | sticky;
121121
} else {
122122
b_significand = one; // sticky; b is known to be non-zero.
@@ -125,8 +125,8 @@ where
125125
if subtraction {
126126
a_significand = a_significand.wrapping_sub(b_significand);
127127
// If a == -b, return +zero.
128-
if a_significand == Int::ZERO {
129-
return F::from_repr(Int::ZERO);
128+
if a_significand == MinInt::ZERO {
129+
return F::from_repr(MinInt::ZERO);
130130
}
131131

132132
// If partial cancellation occured, we need to left-shift the result
@@ -143,8 +143,8 @@ where
143143

144144
// If the addition carried up, we need to right-shift the result and
145145
// adjust the exponent:
146-
if a_significand & implicit_bit << 4 != Int::ZERO {
147-
let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
146+
if a_significand & implicit_bit << 4 != MinInt::ZERO {
147+
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
148148
a_significand = a_significand >> 1 | sticky;
149149
a_exponent += 1;
150150
}
@@ -160,7 +160,7 @@ where
160160
// need to shift the significand.
161161
let shift = (1 - a_exponent).cast();
162162
let sticky =
163-
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
163+
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
164164
a_significand = a_significand >> shift.cast() | sticky;
165165
a_exponent = 0;
166166
}

Diff for: src/float/cmp.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![allow(unreachable_code)]
22

33
use crate::float::Float;
4-
use crate::int::Int;
4+
use crate::int::MinInt;
55

66
#[derive(Clone, Copy)]
77
enum Result {

Diff for: src/float/div.rs

+46-33
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
#![allow(clippy::needless_return)]
44

55
use crate::float::Float;
6-
use crate::int::{CastInto, DInt, HInt, Int};
6+
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
7+
8+
use super::HalfRep;
79

810
fn div32<F: Float>(a: F, b: F) -> F
911
where
@@ -454,15 +456,20 @@ where
454456

455457
fn div64<F: Float>(a: F, b: F) -> F
456458
where
457-
u32: CastInto<F::Int>,
458459
F::Int: CastInto<u32>,
459-
i32: CastInto<F::Int>,
460460
F::Int: CastInto<i32>,
461-
u64: CastInto<F::Int>,
461+
F::Int: CastInto<HalfRep<F>>,
462+
F::Int: From<HalfRep<F>>,
463+
F::Int: From<u8>,
462464
F::Int: CastInto<u64>,
463-
i64: CastInto<F::Int>,
464465
F::Int: CastInto<i64>,
465-
F::Int: HInt,
466+
F::Int: HInt + DInt,
467+
u16: CastInto<F::Int>,
468+
i32: CastInto<F::Int>,
469+
i64: CastInto<F::Int>,
470+
u32: CastInto<F::Int>,
471+
u64: CastInto<F::Int>,
472+
u64: CastInto<HalfRep<F>>,
466473
{
467474
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
468475
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
@@ -471,7 +478,7 @@ where
471478
let one = F::Int::ONE;
472479
let zero = F::Int::ZERO;
473480
let hw = F::BITS / 2;
474-
let lo_mask = u64::MAX >> hw;
481+
let lo_mask = F::Int::MAX >> hw;
475482

476483
let significand_bits = F::SIGNIFICAND_BITS;
477484
let max_exponent = F::EXPONENT_MAX;
@@ -616,21 +623,23 @@ where
616623

617624
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
618625
// Starting with (n-1) half-width iterations
619-
let b_uq1_hw: u32 =
620-
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
626+
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
627+
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
628+
);
621629

622630
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
623631
// with W0 being either 16 or 32 and W0 <= HW.
624632
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
625633
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
626634

627635
// HW is at least 32. Shifting into the highest bits if needed.
628-
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
636+
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));
629637

630638
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
631639
// so x0 fits to UQ0.HW without wrapping.
632-
let x_uq0_hw: u32 = {
633-
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
640+
let x_uq0_hw: HalfRep<F> = {
641+
let mut x_uq0_hw: HalfRep<F> =
642+
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
634643
// dbg!(x_uq0_hw);
635644
// An e_0 error is comprised of errors due to
636645
// * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +670,9 @@ where
661670
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
662671
// expected to be strictly positive because b_UQ1_hw has its highest bit set
663672
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
664-
let corr_uq1_hw: u32 =
665-
0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
673+
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
674+
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
675+
));
666676
// dbg!(corr_uq1_hw);
667677

668678
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +687,9 @@ where
677687
// The fact corr_UQ1_hw was virtually round up (due to result of
678688
// multiplication being **first** truncated, then negated - to improve
679689
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
680-
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
690+
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
691+
>> (hw - 1))
692+
.cast();
681693
// dbg!(x_uq0_hw);
682694
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
683695
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +719,7 @@ where
707719
// be not below that value (see g(x) above), so it is safe to decrement just
708720
// once after the final iteration. On the other hand, an effective value of
709721
// divisor changes after this point (from b_hw to b), so adjust here.
710-
x_uq0_hw.wrapping_sub(1_u32)
722+
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
711723
};
712724

713725
// Error estimations for full-precision iterations are calculated just
@@ -717,7 +729,7 @@ where
717729
// Simulating operations on a twice_rep_t to perform a single final full-width
718730
// iteration. Using ad-hoc multiplication implementations to take advantage
719731
// of particular structure of operands.
720-
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
732+
let blo: F::Int = b_uq1 & lo_mask;
721733
// x_UQ0 = x_UQ0_hw * 2^HW - 1
722734
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
723735
//
@@ -726,19 +738,20 @@ where
726738
// + [ x_UQ0_hw * blo ]
727739
// - [ b_UQ1 ]
728740
// = [ result ][.... discarded ...]
729-
let corr_uq1 = negate_u64(
730-
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
731-
); // account for *possible* carry
732-
let lo_corr = corr_uq1 & lo_mask;
733-
let hi_corr = corr_uq1 >> hw;
741+
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
742+
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
743+
.wrapping_sub(one)
744+
.wrapping_neg(); // account for *possible* carry
745+
let lo_corr: F::Int = corr_uq1 & lo_mask;
746+
let hi_corr: F::Int = corr_uq1 >> hw;
734747
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
735-
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
736-
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
737-
.wrapping_sub(2))
738-
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
739-
// 1 to account for possible carry
740-
// Just like the case of half-width iterations but with possibility
741-
// of overflowing by one extra Ulp of x_UQ0.
748+
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
749+
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
750+
.wrapping_sub(F::Int::from(2u8));
751+
// 1 to account for the highest bit of corr_UQ1 can be 1
752+
// 1 to account for possible carry
753+
// Just like the case of half-width iterations but with possibility
754+
// of overflowing by one extra Ulp of x_UQ0.
742755
x_uq0 -= one;
743756
// ... and then traditional fixup by 2 should work
744757

@@ -755,8 +768,8 @@ where
755768
x_uq0
756769
} else {
757770
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
758-
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
759-
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
771+
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
772+
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
760773
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
761774
x_uq0
762775
};
@@ -806,7 +819,7 @@ where
806819
// Now 1/b - (2*P) * 2^-W < x < 1/b
807820
// FIXME Is x_UQ0 still >= 0.5?
808821

809-
let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
822+
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
810823
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
811824

812825
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -868,7 +881,7 @@ where
868881
// r = a - b * q
869882
let abs_result = if written_exponent > 0 {
870883
let mut ret = quotient & significand_mask;
871-
ret |= ((written_exponent as u64) << significand_bits).cast();
884+
ret |= written_exponent.cast() << significand_bits;
872885
residual <<= 1;
873886
ret
874887
} else {

Diff for: src/float/extend.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
55
fn extend<F: Float, R: Float>(a: F) -> R

Diff for: src/float/mod.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use core::ops;
22

3-
use super::int::Int;
3+
use crate::int::{DInt, Int, MinInt};
44

55
pub mod add;
66
pub mod cmp;
@@ -12,6 +12,9 @@ pub mod pow;
1212
pub mod sub;
1313
pub mod trunc;
1414

15+
/// Wrapper to extract the integer type half of the float's size
16+
pub(crate) type HalfRep<F> = <<F as Float>::Int as DInt>::H;
17+
1518
public_test_dep! {
1619
/// Trait for some basic operations on floats
1720
#[allow(dead_code)]
@@ -60,7 +63,7 @@ pub(crate) trait Float:
6063
/// A mask for the significand
6164
const SIGNIFICAND_MASK: Self::Int;
6265

63-
// The implicit bit of the float format
66+
/// The implicit bit of the float format
6467
const IMPLICIT_BIT: Self::Int;
6568

6669
/// A mask for the exponent

Diff for: src/float/mul.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, DInt, HInt, Int};
2+
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
33

44
fn mul<F: Float>(a: F, b: F) -> F
55
where

Diff for: src/float/trunc.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::float::Float;
2-
use crate::int::{CastInto, Int};
2+
use crate::int::{CastInto, Int, MinInt};
33

44
fn trunc<F: Float, R: Float>(a: F) -> R
55
where

Diff for: src/int/addsub.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use crate::int::{DInt, Int};
1+
use crate::int::{DInt, Int, MinInt};
22

3-
trait UAddSub: DInt {
3+
trait UAddSub: DInt + Int {
44
fn uadd(self, other: Self) -> Self {
55
let (lo, carry) = self.lo().overflowing_add(other.lo());
66
let hi = self.hi().wrapping_add(other.hi());
@@ -22,7 +22,7 @@ impl UAddSub for u128 {}
2222

2323
trait AddSub: Int
2424
where
25-
<Self as Int>::UnsignedInt: UAddSub,
25+
<Self as MinInt>::UnsignedInt: UAddSub,
2626
{
2727
fn add(self, other: Self) -> Self {
2828
Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
@@ -37,7 +37,7 @@ impl AddSub for i128 {}
3737

3838
trait Addo: AddSub
3939
where
40-
<Self as Int>::UnsignedInt: UAddSub,
40+
<Self as MinInt>::UnsignedInt: UAddSub,
4141
{
4242
fn addo(self, other: Self) -> (Self, bool) {
4343
let sum = AddSub::add(self, other);
@@ -50,7 +50,7 @@ impl Addo for u128 {}
5050

5151
trait Subo: AddSub
5252
where
53-
<Self as Int>::UnsignedInt: UAddSub,
53+
<Self as MinInt>::UnsignedInt: UAddSub,
5454
{
5555
fn subo(self, other: Self) -> (Self, bool) {
5656
let sum = AddSub::sub(self, other);

0 commit comments

Comments
 (0)