diff --git a/libc/src/__support/FPUtil/FloatProperties.h b/libc/src/__support/FPUtil/FloatProperties.h index 7f396a649e4f5..3f7dbdc5af342 100644 --- a/libc/src/__support/FPUtil/FloatProperties.h +++ b/libc/src/__support/FPUtil/FloatProperties.h @@ -87,49 +87,57 @@ template struct FPProperties : public internal::FPBaseProperties { private: using UP = internal::FPBaseProperties; - using UP::EXP_BITS; - using UP::SIG_BITS; - using UP::TOTAL_BITS; + // The number of bits to represent sign. For documentation purpose, always 1. + LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1; + using UP::EXP_BITS; // The number of bits for the *exponent* part + using UP::SIG_BITS; // The number of bits for the *significand* part + using UP::TOTAL_BITS; // For convenience, the sum of `SIG_BITS`, `EXP_BITS`, + // and `SIGN_BITS`. + static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS); public: + // An unsigned integer that is wide enough to contain all of the floating + // point bits. using UIntType = typename UP::UIntType; -private: - LIBC_INLINE_VAR static constexpr int STORAGE_BITS = + // The number of bits in UIntType. + LIBC_INLINE_VAR static constexpr int UINTTYPE_BITS = sizeof(UIntType) * CHAR_BIT; - static_assert(STORAGE_BITS >= TOTAL_BITS); - - // The number of bits to represent sign. - // For documentation purpose, always 1. - LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1; - static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS); + static_assert(UINTTYPE_BITS >= TOTAL_BITS); +private: // The exponent bias. Always positive. LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS = (1U << (EXP_BITS - 1U)) - 1U; static_assert(EXP_BIAS > 0); - // Shifts + // The shift amount to get the *significand* part to the least significant + // bit. Always `0` but kept for consistency. LIBC_INLINE_VAR static constexpr int SIG_MASK_SHIFT = 0; + // The shift amount to get the *exponent* part to the least significant bit. LIBC_INLINE_VAR static constexpr int EXP_MASK_SHIFT = SIG_BITS; + // The shift amount to get the *sign* part to the least significant bit. LIBC_INLINE_VAR static constexpr int SIGN_MASK_SHIFT = SIG_BITS + EXP_BITS; - // Masks + // The bit pattern that keeps only the *significand* part. LIBC_INLINE_VAR static constexpr UIntType SIG_MASK = mask_trailing_ones() << SIG_MASK_SHIFT; + // The bit pattern that keeps only the *exponent* part. LIBC_INLINE_VAR static constexpr UIntType EXP_MASK = mask_trailing_ones() << EXP_MASK_SHIFT; public: + // The bit pattern that keeps only the *sign* part. LIBC_INLINE_VAR static constexpr UIntType SIGN_MASK = mask_trailing_ones() << SIGN_MASK_SHIFT; - -private: + // The bit pattern that keeps only the *sign + exponent + significand* part. LIBC_INLINE_VAR static constexpr UIntType FP_MASK = mask_trailing_ones(); + static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint"); static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover"); +private: LIBC_INLINE static constexpr UIntType bit_at(int position) { return UIntType(1) << position; } diff --git a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h index 60953313a695c..f1ef928f23081 100644 --- a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h +++ b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h @@ -26,18 +26,6 @@ namespace LIBC_NAMESPACE { namespace fputil { -template struct Padding; - -// i386 padding. -template <> struct Padding<4> { - static constexpr unsigned VALUE = 16; -}; - -// x86_64 padding. -template <> struct Padding<8> { - static constexpr unsigned VALUE = 48; -}; - template <> struct FPBits { using UIntType = UInt128; @@ -129,11 +117,7 @@ template <> struct FPBits { LIBC_INLINE constexpr UIntType uintval() { // We zero the padding bits as they can contain garbage. - constexpr UIntType MASK = - (UIntType(1) << (sizeof(long double) * 8 - - Padding::VALUE)) - - 1; - return bits & MASK; + return bits & FloatProp::FP_MASK; } LIBC_INLINE constexpr long double get_val() const { diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 34c0c0ceef286..be105830a91ac 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE { using BlockInt = uint32_t; constexpr uint32_t BLOCK_SIZE = 9; -using MantissaInt = fputil::FPBits::UIntType; +using FloatProp = fputil::FloatProperties; // Larger numbers prefer a slightly larger constant than is used for the smaller // numbers. @@ -382,11 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt &val) { (1000000000 * shifted)); } -LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa, +LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::UIntType mantissa, const cpp::UInt &large, const int32_t shift_amount) { - constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8; - cpp::UInt val(large); + cpp::UInt val(large); val = (val * mantissa) >> shift_amount; return static_cast( val.div_uint32_times_pow_2(1000000000, 0).value()); @@ -415,7 +414,7 @@ class FloatToString { fputil::FPBits float_bits; bool is_negative; int exponent; - MantissaInt mantissa; + FloatProp::UIntType mantissa; static constexpr int MANT_WIDTH = fputil::MantissaWidth::VALUE; static constexpr int EXP_BIAS = fputil::FPBits::EXPONENT_BIAS; diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 79d35682d0b71..2a6f15c018f1e 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat init_num, UIntType mantissa = init_num.mantissa; int32_t exp10 = init_num.exponent; - constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; - if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a // double, so we skip straight to the fallback. return cpp::nullopt; @@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat init_num, uint32_t clz = cpp::countl_zero(mantissa); mantissa <<= clz; - int32_t exp2 = - exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz; + int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS + + FloatProp::EXPONENT_BIAS - clz; // Multiplication const uint64_t *power_of_ten = @@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat init_num, // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, // it's 6 bits for floats in this case. const uint64_t halfway_constant = - (uint64_t(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1; + (uint64_t(1) << (FloatProp::UINTTYPE_BITS - + (FloatProp::MANTISSA_WIDTH + 3))) - + 1; if ((high64(first_approx) & halfway_constant) == halfway_constant && low64(first_approx) + mantissa < mantissa) { UInt128 low_bits = @@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat init_num, } // Shifting to 54 bits for doubles and 25 bits for floats - UIntType msb = - static_cast(high64(final_approx) >> (BITS_IN_MANTISSA - 1)); + UIntType msb = static_cast(high64(final_approx) >> + (FloatProp::UINTTYPE_BITS - 1)); UIntType final_mantissa = static_cast( high64(final_approx) >> - (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))); + (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3))); exp2 -= static_cast(1 ^ msb); // same as !msb if (round == RoundDirection::Nearest) { @@ -190,8 +190,6 @@ eisel_lemire(ExpandedFloat init_num, UIntType mantissa = init_num.mantissa; int32_t exp10 = init_num.exponent; - constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; - // Exp10 Range // This doesn't reach very far into the range for long doubles, since it's // sized for doubles and their 11 exponent bits, and not for long doubles and @@ -211,8 +209,8 @@ eisel_lemire(ExpandedFloat init_num, uint32_t clz = cpp::countl_zero(mantissa); mantissa <<= clz; - int32_t exp2 = - exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz; + int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS + + FloatProp::EXPONENT_BIAS - clz; // Multiplication const uint64_t *power_of_ten = @@ -249,7 +247,9 @@ eisel_lemire(ExpandedFloat init_num, // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, // it's 12 bits for 128 bit floats in this case. constexpr UInt128 HALFWAY_CONSTANT = - (UInt128(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1; + (UInt128(1) << (FloatProp::UINTTYPE_BITS - + (FloatProp::MANTISSA_WIDTH + 3))) - + 1; if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && final_approx_lower + mantissa < mantissa) { @@ -257,11 +257,11 @@ eisel_lemire(ExpandedFloat init_num, } // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats - uint32_t msb = - static_cast(final_approx_upper >> (BITS_IN_MANTISSA - 1)); + uint32_t msb = static_cast(final_approx_upper >> + (FloatProp::UINTTYPE_BITS - 1)); UIntType final_mantissa = final_approx_upper >> - (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3)); + (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3)); exp2 -= static_cast(1 ^ msb); // same as !msb if (round == RoundDirection::Nearest) { @@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound() { return 309; } // other out, and subnormal numbers allow for the result to be at the very low // end of the final mantissa. template constexpr int32_t get_lower_bound() { - return -((fputil::FloatProperties::EXPONENT_BIAS + - static_cast(fputil::FloatProperties::MANTISSA_WIDTH + - (sizeof(T) * 8))) / + using FloatProp = typename fputil::FloatProperties; + return -((FloatProp::EXPONENT_BIAS + + static_cast(FloatProp::MANTISSA_WIDTH + + FloatProp::UINTTYPE_BITS)) / 3); } @@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, // This is the number of leading zeroes a properly normalized float of type T // should have. - constexpr int32_t NUMBITS = sizeof(UIntType) * 8; constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1; // Normalization step 1: Bring the leading bit to the highest bit of UIntType. @@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, // Keep exp2 representing the exponent of the lowest bit of UIntType. exp2 -= amount_to_shift_left; - // biasedExponent represents the biased exponent of the most significant bit. - int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1; + // biased_exponent represents the biased exponent of the most significant bit. + int32_t biased_exponent = + exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1; // Handle numbers that're too large and get squashed to inf if (biased_exponent >= INF_EXP) { @@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, return output; } - uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1; + uint32_t amount_to_shift_right = + FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1; // Handle subnormals. if (biased_exponent <= 0) { amount_to_shift_right += 1 - biased_exponent; biased_exponent = 0; - if (amount_to_shift_right > NUMBITS) { + if (amount_to_shift_right > FloatProp::UINTTYPE_BITS) { // Return 0 if the exponent is too small. output.num = {0, 0}; output.error = ERANGE; @@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, bool round_bit = static_cast(mantissa & round_bit_mask); bool sticky_bit = static_cast(mantissa & sticky_mask) || truncated; - if (amount_to_shift_right < NUMBITS) { + if (amount_to_shift_right < FloatProp::UINTTYPE_BITS) { // Shift the mantissa and clear the implicit bit. mantissa >>= amount_to_shift_right; mantissa &= FloatProp::MANTISSA_MASK;