3
3
#![ allow( clippy:: needless_return) ]
4
4
5
5
use crate :: float:: Float ;
6
- use crate :: int:: { CastInto , DInt , HInt , Int } ;
6
+ use crate :: int:: { CastInto , DInt , HInt , Int , MinInt } ;
7
+
8
+ use super :: HalfRep ;
7
9
8
10
fn div32 < F : Float > ( a : F , b : F ) -> F
9
11
where
@@ -454,15 +456,20 @@ where
454
456
455
457
fn div64 < F : Float > ( a : F , b : F ) -> F
456
458
where
457
- u32 : CastInto < F :: Int > ,
458
459
F :: Int : CastInto < u32 > ,
459
- i32 : CastInto < F :: Int > ,
460
460
F :: Int : CastInto < i32 > ,
461
- u64 : CastInto < F :: Int > ,
461
+ F :: Int : CastInto < HalfRep < F > > ,
462
+ F :: Int : From < HalfRep < F > > ,
463
+ F :: Int : From < u8 > ,
462
464
F :: Int : CastInto < u64 > ,
463
- i64 : CastInto < F :: Int > ,
464
465
F :: Int : CastInto < i64 > ,
465
- F :: Int : HInt ,
466
+ F :: Int : HInt + DInt ,
467
+ u16 : CastInto < F :: Int > ,
468
+ i32 : CastInto < F :: Int > ,
469
+ i64 : CastInto < F :: Int > ,
470
+ u32 : CastInto < F :: Int > ,
471
+ u64 : CastInto < F :: Int > ,
472
+ u64 : CastInto < HalfRep < F > > ,
466
473
{
467
474
const NUMBER_OF_HALF_ITERATIONS : usize = 3 ;
468
475
const NUMBER_OF_FULL_ITERATIONS : usize = 1 ;
@@ -471,7 +478,7 @@ where
471
478
let one = F :: Int :: ONE ;
472
479
let zero = F :: Int :: ZERO ;
473
480
let hw = F :: BITS / 2 ;
474
- let lo_mask = u64 :: MAX >> hw;
481
+ let lo_mask = F :: Int :: MAX >> hw;
475
482
476
483
let significand_bits = F :: SIGNIFICAND_BITS ;
477
484
let max_exponent = F :: EXPONENT_MAX ;
@@ -616,21 +623,23 @@ where
616
623
617
624
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
618
625
// Starting with (n-1) half-width iterations
619
- let b_uq1_hw: u32 =
620
- ( CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ) as u32 ;
626
+ let b_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast (
627
+ CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ,
628
+ ) ;
621
629
622
630
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
623
631
// with W0 being either 16 or 32 and W0 <= HW.
624
632
// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
625
633
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
626
634
627
635
// HW is at least 32. Shifting into the highest bits if needed.
628
- let c_hw = ( 0x7504F333_u64 as u32 ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
636
+ let c_hw = ( CastInto :: < HalfRep < F > > :: cast ( 0x7504F333_u64 ) ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
629
637
630
638
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
631
639
// so x0 fits to UQ0.HW without wrapping.
632
- let x_uq0_hw: u32 = {
633
- let mut x_uq0_hw: u32 = c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
640
+ let x_uq0_hw: HalfRep < F > = {
641
+ let mut x_uq0_hw: HalfRep < F > =
642
+ c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
634
643
// dbg!(x_uq0_hw);
635
644
// An e_0 error is comprised of errors due to
636
645
// * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +670,9 @@ where
661
670
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
662
671
// expected to be strictly positive because b_UQ1_hw has its highest bit set
663
672
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
664
- let corr_uq1_hw: u32 =
665
- 0 . wrapping_sub ( ( ( x_uq0_hw as u64 ) . wrapping_mul ( b_uq1_hw as u64 ) ) >> hw) as u32 ;
673
+ let corr_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast ( zero. wrapping_sub (
674
+ ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( b_uq1_hw) ) ) >> hw,
675
+ ) ) ;
666
676
// dbg!(corr_uq1_hw);
667
677
668
678
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +687,9 @@ where
677
687
// The fact corr_UQ1_hw was virtually round up (due to result of
678
688
// multiplication being **first** truncated, then negated - to improve
679
689
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
680
- x_uq0_hw = ( ( x_uq0_hw as u64 ) . wrapping_mul ( corr_uq1_hw as u64 ) >> ( hw - 1 ) ) as u32 ;
690
+ x_uq0_hw = ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( corr_uq1_hw) )
691
+ >> ( hw - 1 ) )
692
+ . cast ( ) ;
681
693
// dbg!(x_uq0_hw);
682
694
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
683
695
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +719,7 @@ where
707
719
// be not below that value (see g(x) above), so it is safe to decrement just
708
720
// once after the final iteration. On the other hand, an effective value of
709
721
// divisor changes after this point (from b_hw to b), so adjust here.
710
- x_uq0_hw. wrapping_sub ( 1_u32 )
722
+ x_uq0_hw. wrapping_sub ( HalfRep :: < F > :: ONE )
711
723
} ;
712
724
713
725
// Error estimations for full-precision iterations are calculated just
@@ -717,7 +729,7 @@ where
717
729
// Simulating operations on a twice_rep_t to perform a single final full-width
718
730
// iteration. Using ad-hoc multiplication implementations to take advantage
719
731
// of particular structure of operands.
720
- let blo: u64 = ( CastInto :: < u64 > :: cast ( b_uq1) ) & lo_mask;
732
+ let blo: F :: Int = b_uq1 & lo_mask;
721
733
// x_UQ0 = x_UQ0_hw * 2^HW - 1
722
734
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
723
735
//
@@ -726,19 +738,20 @@ where
726
738
// + [ x_UQ0_hw * blo ]
727
739
// - [ b_UQ1 ]
728
740
// = [ result ][.... discarded ...]
729
- let corr_uq1 = negate_u64 (
730
- ( x_uq0_hw as u64 ) * ( b_uq1_hw as u64 ) + ( ( ( x_uq0_hw as u64 ) * ( blo) ) >> hw) - 1 ,
731
- ) ; // account for *possible* carry
732
- let lo_corr = corr_uq1 & lo_mask;
733
- let hi_corr = corr_uq1 >> hw;
741
+ let corr_uq1: F :: Int = ( F :: Int :: from ( x_uq0_hw) * F :: Int :: from ( b_uq1_hw)
742
+ + ( ( F :: Int :: from ( x_uq0_hw) * blo) >> hw) )
743
+ . wrapping_sub ( one)
744
+ . wrapping_neg ( ) ; // account for *possible* carry
745
+ let lo_corr: F :: Int = corr_uq1 & lo_mask;
746
+ let hi_corr: F :: Int = corr_uq1 >> hw;
734
747
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
735
- let mut x_uq0: < F as Float > :: Int = ( ( ( ( x_uq0_hw as u64 ) * hi_corr) << 1 )
736
- . wrapping_add ( ( ( x_uq0_hw as u64 ) * lo_corr) >> ( hw - 1 ) )
737
- . wrapping_sub ( 2 ) )
738
- . cast ( ) ; // 1 to account for the highest bit of corr_UQ1 can be 1
739
- // 1 to account for possible carry
740
- // Just like the case of half-width iterations but with possibility
741
- // of overflowing by one extra Ulp of x_UQ0.
748
+ let mut x_uq0: F :: Int = ( ( F :: Int :: from ( x_uq0_hw) * hi_corr) << 1 )
749
+ . wrapping_add ( ( F :: Int :: from ( x_uq0_hw) * lo_corr) >> ( hw - 1 ) )
750
+ . wrapping_sub ( F :: Int :: from ( 2u8 ) ) ;
751
+ // 1 to account for the highest bit of corr_UQ1 can be 1
752
+ // 1 to account for possible carry
753
+ // Just like the case of half-width iterations but with possibility
754
+ // of overflowing by one extra Ulp of x_UQ0.
742
755
x_uq0 -= one;
743
756
// ... and then traditional fixup by 2 should work
744
757
@@ -755,8 +768,8 @@ where
755
768
x_uq0
756
769
} else {
757
770
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
758
- let c: < F as Float > :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
759
- let x_uq0: < F as Float > :: Int = c. wrapping_sub ( b_uq1) ;
771
+ let c: F :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
772
+ let x_uq0: F :: Int = c. wrapping_sub ( b_uq1) ;
760
773
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
761
774
x_uq0
762
775
} ;
@@ -806,7 +819,7 @@ where
806
819
// Now 1/b - (2*P) * 2^-W < x < 1/b
807
820
// FIXME Is x_UQ0 still >= 0.5?
808
821
809
- let mut quotient: < F as Float > :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
822
+ let mut quotient: F :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
810
823
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
811
824
812
825
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -868,7 +881,7 @@ where
868
881
// r = a - b * q
869
882
let abs_result = if written_exponent > 0 {
870
883
let mut ret = quotient & significand_mask;
871
- ret |= ( ( written_exponent as u64 ) << significand_bits) . cast ( ) ;
884
+ ret |= written_exponent. cast ( ) << significand_bits;
872
885
residual <<= 1 ;
873
886
ret
874
887
} else {
0 commit comments