Skip to content

[libc][NFC] Reuse FloatProperties constant instead of creating new ones #75187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions libc/src/__support/FPUtil/FloatProperties.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,49 +87,57 @@ template <FPType fp_type>
struct FPProperties : public internal::FPBaseProperties<fp_type> {
private:
using UP = internal::FPBaseProperties<fp_type>;
using UP::EXP_BITS;
using UP::SIG_BITS;
using UP::TOTAL_BITS;
// The number of bits to represent sign. For documentation purpose, always 1.
LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
using UP::EXP_BITS; // The number of bits for the *exponent* part
using UP::SIG_BITS; // The number of bits for the *significand* part
using UP::TOTAL_BITS; // For convenience, the sum of `SIG_BITS`, `EXP_BITS`,
// and `SIGN_BITS`.
static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);

public:
// An unsigned integer that is wide enough to contain all of the floating
// point bits.
using UIntType = typename UP::UIntType;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets' use this as an opportunity to document this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


private:
LIBC_INLINE_VAR static constexpr int STORAGE_BITS =
// The number of bits in UIntType.
LIBC_INLINE_VAR static constexpr int UINTTYPE_BITS =
sizeof(UIntType) * CHAR_BIT;
static_assert(STORAGE_BITS >= TOTAL_BITS);

// The number of bits to represent sign.
// For documentation purpose, always 1.
LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
static_assert(UINTTYPE_BITS >= TOTAL_BITS);

private:
// The exponent bias. Always positive.
LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
(1U << (EXP_BITS - 1U)) - 1U;
static_assert(EXP_BIAS > 0);

// Shifts
// The shift amount to get the *significand* part to the least significant
// bit. Always `0` but kept for consistency.
LIBC_INLINE_VAR static constexpr int SIG_MASK_SHIFT = 0;
// The shift amount to get the *exponent* part to the least significant bit.
LIBC_INLINE_VAR static constexpr int EXP_MASK_SHIFT = SIG_BITS;
// The shift amount to get the *sign* part to the least significant bit.
LIBC_INLINE_VAR static constexpr int SIGN_MASK_SHIFT = SIG_BITS + EXP_BITS;

// Masks
// The bit pattern that keeps only the *significand* part.
LIBC_INLINE_VAR static constexpr UIntType SIG_MASK =
mask_trailing_ones<UIntType, SIG_BITS>() << SIG_MASK_SHIFT;
// The bit pattern that keeps only the *exponent* part.
LIBC_INLINE_VAR static constexpr UIntType EXP_MASK =
mask_trailing_ones<UIntType, EXP_BITS>() << EXP_MASK_SHIFT;

public:
// The bit pattern that keeps only the *sign* part.
LIBC_INLINE_VAR static constexpr UIntType SIGN_MASK =
mask_trailing_ones<UIntType, SIGN_BITS>() << SIGN_MASK_SHIFT;

private:
// The bit pattern that keeps only the *sign + exponent + significand* part.
LIBC_INLINE_VAR static constexpr UIntType FP_MASK =
mask_trailing_ones<UIntType, TOTAL_BITS>();

static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");

private:
LIBC_INLINE static constexpr UIntType bit_at(int position) {
return UIntType(1) << position;
}
Expand Down
18 changes: 1 addition & 17 deletions libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,6 @@
namespace LIBC_NAMESPACE {
namespace fputil {

template <unsigned Width> struct Padding;

// i386 padding.
template <> struct Padding<4> {
static constexpr unsigned VALUE = 16;
};

// x86_64 padding.
template <> struct Padding<8> {
static constexpr unsigned VALUE = 48;
};

template <> struct FPBits<long double> {
using UIntType = UInt128;

Expand Down Expand Up @@ -129,11 +117,7 @@ template <> struct FPBits<long double> {

LIBC_INLINE constexpr UIntType uintval() {
// We zero the padding bits as they can contain garbage.
constexpr UIntType MASK =
(UIntType(1) << (sizeof(long double) * 8 -
Padding<sizeof(uintptr_t)>::VALUE)) -
1;
return bits & MASK;
return bits & FloatProp::FP_MASK;
}

LIBC_INLINE constexpr long double get_val() const {
Expand Down
9 changes: 4 additions & 5 deletions libc/src/__support/float_to_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
using BlockInt = uint32_t;
constexpr uint32_t BLOCK_SIZE = 9;

using MantissaInt = fputil::FPBits<long double>::UIntType;
using FloatProp = fputil::FloatProperties<long double>;

// Larger numbers prefer a slightly larger constant than is used for the smaller
// numbers.
Expand Down Expand Up @@ -382,11 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
(1000000000 * shifted));
}

LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::UIntType mantissa,
const cpp::UInt<MID_INT_SIZE> &large,
const int32_t shift_amount) {
constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
cpp::UInt<MID_INT_SIZE + FloatProp::UINTTYPE_BITS> val(large);
val = (val * mantissa) >> shift_amount;
return static_cast<uint32_t>(
val.div_uint32_times_pow_2(1000000000, 0).value());
Expand Down Expand Up @@ -415,7 +414,7 @@ class FloatToString {
fputil::FPBits<T> float_bits;
bool is_negative;
int exponent;
MantissaInt mantissa;
FloatProp::UIntType mantissa;

static constexpr int MANT_WIDTH = fputil::MantissaWidth<T>::VALUE;
static constexpr int EXP_BIAS = fputil::FPBits<T>::EXPONENT_BIAS;
Expand Down
52 changes: 27 additions & 25 deletions libc/src/__support/str_to_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
UIntType mantissa = init_num.mantissa;
int32_t exp10 = init_num.exponent;

constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow this was misleading. Much better now.


if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a
// double, so we skip straight to the fallback.
return cpp::nullopt;
Expand All @@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
mantissa <<= clz;

int32_t exp2 =
exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
FloatProp::EXPONENT_BIAS - clz;

// Multiplication
const uint64_t *power_of_ten =
Expand All @@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
// it's 6 bits for floats in this case.
const uint64_t halfway_constant =
(uint64_t(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
(uint64_t(1) << (FloatProp::UINTTYPE_BITS -
(FloatProp::MANTISSA_WIDTH + 3))) -
1;
if ((high64(first_approx) & halfway_constant) == halfway_constant &&
low64(first_approx) + mantissa < mantissa) {
UInt128 low_bits =
Expand All @@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
}

// Shifting to 54 bits for doubles and 25 bits for floats
UIntType msb =
static_cast<UIntType>(high64(final_approx) >> (BITS_IN_MANTISSA - 1));
UIntType msb = static_cast<UIntType>(high64(final_approx) >>
(FloatProp::UINTTYPE_BITS - 1));
UIntType final_mantissa = static_cast<UIntType>(
high64(final_approx) >>
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3)));
(msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3)));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb

if (round == RoundDirection::Nearest) {
Expand Down Expand Up @@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
UIntType mantissa = init_num.mantissa;
int32_t exp10 = init_num.exponent;

constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;

// Exp10 Range
// This doesn't reach very far into the range for long doubles, since it's
// sized for doubles and their 11 exponent bits, and not for long doubles and
Expand All @@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
mantissa <<= clz;

int32_t exp2 =
exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
FloatProp::EXPONENT_BIAS - clz;

// Multiplication
const uint64_t *power_of_ten =
Expand Down Expand Up @@ -249,19 +247,21 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
// it's 12 bits for 128 bit floats in this case.
constexpr UInt128 HALFWAY_CONSTANT =
(UInt128(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
(UInt128(1) << (FloatProp::UINTTYPE_BITS -
(FloatProp::MANTISSA_WIDTH + 3))) -
1;

if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
final_approx_lower + mantissa < mantissa) {
return cpp::nullopt;
}

// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
uint32_t msb =
static_cast<uint32_t>(final_approx_upper >> (BITS_IN_MANTISSA - 1));
uint32_t msb = static_cast<uint32_t>(final_approx_upper >>
(FloatProp::UINTTYPE_BITS - 1));
UIntType final_mantissa =
final_approx_upper >>
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3));
(msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb

if (round == RoundDirection::Nearest) {
Expand Down Expand Up @@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
// other out, and subnormal numbers allow for the result to be at the very low
// end of the final mantissa.
template <typename T> constexpr int32_t get_lower_bound() {
return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
static_cast<int32_t>(fputil::FloatProperties<T>::MANTISSA_WIDTH +
(sizeof(T) * 8))) /
using FloatProp = typename fputil::FloatProperties<T>;
return -((FloatProp::EXPONENT_BIAS +
static_cast<int32_t>(FloatProp::MANTISSA_WIDTH +
FloatProp::UINTTYPE_BITS)) /
3);
}

Expand Down Expand Up @@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,

// This is the number of leading zeroes a properly normalized float of type T
// should have.
constexpr int32_t NUMBITS = sizeof(UIntType) * 8;
constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1;

// Normalization step 1: Bring the leading bit to the highest bit of UIntType.
Expand All @@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
// Keep exp2 representing the exponent of the lowest bit of UIntType.
exp2 -= amount_to_shift_left;

// biasedExponent represents the biased exponent of the most significant bit.
int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1;
// biased_exponent represents the biased exponent of the most significant bit.
int32_t biased_exponent =
exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1;

// Handle numbers that're too large and get squashed to inf
if (biased_exponent >= INF_EXP) {
Expand All @@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
return output;
}

uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1;
uint32_t amount_to_shift_right =
FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1;

// Handle subnormals.
if (biased_exponent <= 0) {
amount_to_shift_right += 1 - biased_exponent;
biased_exponent = 0;

if (amount_to_shift_right > NUMBITS) {
if (amount_to_shift_right > FloatProp::UINTTYPE_BITS) {
// Return 0 if the exponent is too small.
output.num = {0, 0};
output.error = ERANGE;
Expand All @@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;

if (amount_to_shift_right < NUMBITS) {
if (amount_to_shift_right < FloatProp::UINTTYPE_BITS) {
// Shift the mantissa and clear the implicit bit.
mantissa >>= amount_to_shift_right;
mantissa &= FloatProp::MANTISSA_MASK;
Expand Down