@@ -37,8 +37,7 @@ namespace fputil {
37
37
// The outputs of the constructors and most functions will be normalized.
38
38
// To simplify and improve the efficiency, many functions will assume that the
39
39
// inputs are normal.
40
- template <size_t Bits> class DyadicFloat {
41
- public:
40
+ template <size_t Bits> struct DyadicFloat {
42
41
using MantissaType = LIBC_NAMESPACE::UInt<Bits>;
43
42
44
43
Sign sign = Sign::POS;
@@ -102,18 +101,116 @@ template <size_t Bits> class DyadicFloat {
102
101
return exponent + (Bits - 1 );
103
102
}
104
103
105
- // Assume that it is already normalized.
106
- // Output is rounded correctly with respect to the current rounding mode.
104
+ template <typename T>
105
+ LIBC_INLINE constexpr cpp::enable_if_t <
106
+ cpp::is_floating_point_v<T> && (FPBits<T>::FRACTION_LEN < Bits), T>
107
+ generic_as () const {
108
+ using FPBits = FPBits<float16>;
109
+ using StorageType = typename FPBits::StorageType;
110
+
111
+ constexpr int EXTRA_FRACTION_LEN = Bits - 1 - FPBits::FRACTION_LEN;
112
+
113
+ if (mantissa == 0 )
114
+ return FPBits::zero (sign).get_val ();
115
+
116
+ int unbiased_exp = get_unbiased_exponent ();
117
+
118
+ if (unbiased_exp + FPBits::EXP_BIAS >= FPBits::MAX_BIASED_EXPONENT) {
119
+ set_errno_if_required (ERANGE);
120
+ raise_except_if_required (FE_OVERFLOW | FE_INEXACT);
121
+
122
+ switch (quick_get_round ()) {
123
+ case FE_TONEAREST:
124
+ return FPBits::inf (sign).get_val ();
125
+ case FE_TOWARDZERO:
126
+ return FPBits::max_normal (sign).get_val ();
127
+ case FE_DOWNWARD:
128
+ if (sign.is_pos ())
129
+ return FPBits::max_normal (Sign::POS).get_val ();
130
+ return FPBits::inf (Sign::NEG).get_val ();
131
+ case FE_UPWARD:
132
+ if (sign.is_neg ())
133
+ return FPBits::max_normal (Sign::NEG).get_val ();
134
+ return FPBits::inf (Sign::POS).get_val ();
135
+ default :
136
+ __builtin_unreachable ();
137
+ }
138
+ }
139
+
140
+ StorageType out_biased_exp = 0 ;
141
+ StorageType out_mantissa = 0 ;
142
+ bool round = false ;
143
+ bool sticky = false ;
144
+ bool underflow = false ;
145
+
146
+ if (unbiased_exp < -FPBits::EXP_BIAS - FPBits::FRACTION_LEN) {
147
+ sticky = true ;
148
+ underflow = true ;
149
+ } else if (unbiased_exp == -FPBits::EXP_BIAS - FPBits::FRACTION_LEN) {
150
+ round = true ;
151
+ MantissaType sticky_mask = (MantissaType (1 ) << (Bits - 1 )) - 1 ;
152
+ sticky = (mantissa & sticky_mask) != 0 ;
153
+ } else {
154
+ int extra_fraction_len = EXTRA_FRACTION_LEN;
155
+
156
+ if (unbiased_exp < 1 - FPBits::EXP_BIAS) {
157
+ underflow = true ;
158
+ extra_fraction_len += 1 - FPBits::EXP_BIAS - unbiased_exp;
159
+ } else {
160
+ out_biased_exp =
161
+ static_cast <StorageType>(unbiased_exp + FPBits::EXP_BIAS);
162
+ }
163
+
164
+ MantissaType round_mask = MantissaType (1 ) << (extra_fraction_len - 1 );
165
+ round = (mantissa & round_mask) != 0 ;
166
+ MantissaType sticky_mask = round_mask - 1 ;
167
+ sticky = (mantissa & sticky_mask) != 0 ;
168
+
169
+ out_mantissa = static_cast <StorageType>(mantissa >> extra_fraction_len);
170
+ }
171
+
172
+ bool lsb = (out_mantissa & 1 ) != 0 ;
173
+
174
+ StorageType result =
175
+ FPBits::create_value (sign, out_biased_exp, out_mantissa).uintval ();
176
+
177
+ switch (quick_get_round ()) {
178
+ case FE_TONEAREST:
179
+ if (round && (lsb || sticky))
180
+ ++result;
181
+ break ;
182
+ case FE_DOWNWARD:
183
+ if (sign.is_neg () && (round || sticky))
184
+ ++result;
185
+ break ;
186
+ case FE_UPWARD:
187
+ if (sign.is_pos () && (round || sticky))
188
+ ++result;
189
+ break ;
190
+ default :
191
+ break ;
192
+ }
193
+
194
+ if (round || sticky) {
195
+ int excepts = FE_INEXACT;
196
+ if (FPBits (result).is_inf ()) {
197
+ set_errno_if_required (ERANGE);
198
+ excepts |= FE_OVERFLOW;
199
+ } else if (underflow) {
200
+ set_errno_if_required (ERANGE);
201
+ excepts |= FE_UNDERFLOW;
202
+ }
203
+ raise_except_if_required (excepts);
204
+ }
205
+
206
+ return FPBits (result).get_val ();
207
+ }
208
+
107
209
template <typename T, bool ShouldSignalExceptions,
108
210
typename = cpp::enable_if_t <cpp::is_floating_point_v<T> &&
109
211
(FPBits<T>::FRACTION_LEN < Bits),
110
212
void >>
111
- LIBC_INLINE constexpr T as () const {
112
- #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
113
- if constexpr (cpp::is_same_v<T, float16>)
114
- return generic_as<T>();
115
- #endif
116
-
213
+ LIBC_INLINE constexpr T fast_as () const {
117
214
if (LIBC_UNLIKELY (mantissa.is_zero ()))
118
215
return FPBits<T>::zero (sign).get_val ();
119
216
@@ -234,6 +331,22 @@ template <size_t Bits> class DyadicFloat {
234
331
return r;
235
332
}
236
333
334
+ // Assume that it is already normalized.
335
+ // Output is rounded correctly with respect to the current rounding mode.
336
+ template <typename T, bool ShouldSignalExceptions,
337
+ typename = cpp::enable_if_t <cpp::is_floating_point_v<T> &&
338
+ (FPBits<T>::FRACTION_LEN < Bits),
339
+ void >>
340
+ LIBC_INLINE constexpr T as () const {
341
+ #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
342
+ if constexpr (cpp::is_same_v<T, float16>) {
343
+ static_assert (ShouldSignalExceptions);
344
+ return generic_as<T>();
345
+ }
346
+ #endif
347
+ return fast_as<T, ShouldSignalExceptions>();
348
+ }
349
+
237
350
template <typename T,
238
351
typename = cpp::enable_if_t <cpp::is_floating_point_v<T> &&
239
352
(FPBits<T>::FRACTION_LEN < Bits),
@@ -259,114 +372,6 @@ template <size_t Bits> class DyadicFloat {
259
372
260
373
return new_mant;
261
374
}
262
-
263
- private:
264
- template <typename OutType>
265
- LIBC_INLINE constexpr cpp::enable_if_t <
266
- cpp::is_floating_point_v<OutType> &&
267
- sizeof (typename FPBits<OutType>::StorageType) <= sizeof (MantissaType),
268
- OutType>
269
- generic_as () const {
270
- using FPBits = FPBits<float16>;
271
- using StorageType = typename FPBits::StorageType;
272
-
273
- constexpr int EXTRA_FRACTION_LEN = Bits - 1 - FPBits::FRACTION_LEN;
274
-
275
- if (mantissa == 0 )
276
- return FPBits::zero (sign).get_val ();
277
-
278
- int unbiased_exp = get_unbiased_exponent ();
279
-
280
- if (unbiased_exp + FPBits::EXP_BIAS >= FPBits::MAX_BIASED_EXPONENT) {
281
- set_errno_if_required (ERANGE);
282
- raise_except_if_required (FE_OVERFLOW | FE_INEXACT);
283
-
284
- switch (quick_get_round ()) {
285
- case FE_TONEAREST:
286
- return FPBits::inf (sign).get_val ();
287
- case FE_TOWARDZERO:
288
- return FPBits::max_normal (sign).get_val ();
289
- case FE_DOWNWARD:
290
- if (sign.is_pos ())
291
- return FPBits::max_normal (Sign::POS).get_val ();
292
- return FPBits::inf (Sign::NEG).get_val ();
293
- case FE_UPWARD:
294
- if (sign.is_neg ())
295
- return FPBits::max_normal (Sign::NEG).get_val ();
296
- return FPBits::inf (Sign::POS).get_val ();
297
- default :
298
- __builtin_unreachable ();
299
- }
300
- }
301
-
302
- StorageType out_biased_exp = 0 ;
303
- StorageType out_mantissa = 0 ;
304
- bool round = false ;
305
- bool sticky = false ;
306
- bool underflow = false ;
307
-
308
- if (unbiased_exp < -FPBits::EXP_BIAS - FPBits::FRACTION_LEN) {
309
- sticky = true ;
310
- underflow = true ;
311
- } else if (unbiased_exp == -FPBits::EXP_BIAS - FPBits::FRACTION_LEN) {
312
- round = true ;
313
- MantissaType sticky_mask = (MantissaType (1 ) << (Bits - 1 )) - 1 ;
314
- sticky = (mantissa & sticky_mask) != 0 ;
315
- } else {
316
- int extra_fraction_len = EXTRA_FRACTION_LEN;
317
-
318
- if (unbiased_exp < 1 - FPBits::EXP_BIAS) {
319
- underflow = true ;
320
- extra_fraction_len += 1 - FPBits::EXP_BIAS - unbiased_exp;
321
- } else {
322
- out_biased_exp =
323
- static_cast <StorageType>(unbiased_exp + FPBits::EXP_BIAS);
324
- }
325
-
326
- MantissaType round_mask = MantissaType (1 ) << (extra_fraction_len - 1 );
327
- round = (mantissa & round_mask) != 0 ;
328
- MantissaType sticky_mask = round_mask - 1 ;
329
- sticky = (mantissa & sticky_mask) != 0 ;
330
-
331
- out_mantissa = static_cast <StorageType>(mantissa >> extra_fraction_len);
332
- }
333
-
334
- bool lsb = (out_mantissa & 1 ) != 0 ;
335
-
336
- StorageType result =
337
- FPBits::create_value (sign, out_biased_exp, out_mantissa).uintval ();
338
-
339
- switch (quick_get_round ()) {
340
- case FE_TONEAREST:
341
- if (round && (lsb || sticky))
342
- ++result;
343
- break ;
344
- case FE_DOWNWARD:
345
- if (sign.is_neg () && (round || sticky))
346
- ++result;
347
- break ;
348
- case FE_UPWARD:
349
- if (sign.is_pos () && (round || sticky))
350
- ++result;
351
- break ;
352
- default :
353
- break ;
354
- }
355
-
356
- if (round || sticky) {
357
- int excepts = FE_INEXACT;
358
- if (FPBits (result).is_inf ()) {
359
- set_errno_if_required (ERANGE);
360
- excepts |= FE_OVERFLOW;
361
- } else if (underflow) {
362
- set_errno_if_required (ERANGE);
363
- excepts |= FE_UNDERFLOW;
364
- }
365
- raise_except_if_required (excepts);
366
- }
367
-
368
- return FPBits (result).get_val ();
369
- }
370
375
};
371
376
372
377
// Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize the
0 commit comments