Skip to content

Commit e48d83c

Browse files
authored
flambda-backend: Float32 min/max/rounding intrinsics (#2684)
1 parent 93f2ff8 commit e48d83c

File tree

6 files changed

+224
-7
lines changed

6 files changed

+224
-7
lines changed

otherlibs/stdlib_beta/float32.ml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,16 @@ let[@inline] max (x : t) (y : t) =
246246
else if is_nan y then y
247247
else x
248248

249+
module With_weird_nan_behavior = struct
250+
external min : t -> t -> t
251+
= "caml_sse_float32_min_bytecode" "caml_sse_float32_min"
252+
[@@noalloc] [@@unboxed] [@@builtin]
253+
254+
external max : t -> t -> t
255+
= "caml_sse_float32_max_bytecode" "caml_sse_float32_max"
256+
[@@noalloc] [@@unboxed] [@@builtin]
257+
end
258+
249259
let[@inline] min_max (x : t) (y : t) =
250260
if is_nan x || is_nan y then (nan, nan)
251261
else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y)
@@ -267,6 +277,24 @@ let[@inline] min_max_num (x : t) (y : t) =
267277
else if y > x || ((not (sign_bit y)) && sign_bit x) then (x, y)
268278
else (y, x)
269279

280+
external iround_half_to_even : t -> int64
281+
= "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64"
282+
[@@noalloc] [@@unboxed] [@@builtin]
283+
284+
external round_intrinsic : (int[@untagged]) -> (t[@unboxed]) -> (t[@unboxed])
285+
= "caml_sse41_float32_round_bytecode" "caml_sse41_float32_round"
286+
[@@noalloc] [@@builtin]
287+
288+
(* On amd64, these constants also imply _MM_FROUND_NO_EXC (suppress exceptions). *)
289+
let round_neg_inf = 0x9
290+
let round_pos_inf = 0xA
291+
let round_zero = 0xB
292+
let round_current_mode = 0xC
293+
let[@inline] round_half_to_even x = round_intrinsic round_current_mode x
294+
let[@inline] round_down x = round_intrinsic round_neg_inf x
295+
let[@inline] round_up x = round_intrinsic round_pos_inf x
296+
let[@inline] round_towards_zero x = round_intrinsic round_zero x
297+
270298
external seeded_hash_param : int -> int -> int -> 'a -> int = "caml_hash_exn"
271299
[@@noalloc]
272300

otherlibs/stdlib_beta/float32.mli

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -374,17 +374,14 @@ external erfc : t -> t = "caml_erfc_float32_bytecode" "erfcf"
374374
external trunc : t -> t = "caml_trunc_float32_bytecode" "truncf"
375375
[@@unboxed] [@@noalloc]
376376
(** [trunc x] rounds [x] to the nearest integer whose absolute value is
377-
less than or equal to [x]. *)
377+
less than or equal to [x]. *)
378378

379379
external round : t -> t = "caml_round_float32_bytecode" "roundf"
380380
[@@unboxed] [@@noalloc]
381381
(** [round x] rounds [x] to the nearest integer with ties (fractional
382-
values of 0.5s) rounded away from zero, regardless of the current
383-
rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or
384-
infinite, [x] itself is returned.
385-
386-
On 64-bit mingw-w64, this function may be emulated owing to a bug in the
387-
C runtime library (CRT) on this platform. *)
382+
values of 0.5s) rounded away from zero, regardless of the current
383+
rounding direction. If [x] is an integer, [+0.s], [-0.s], [nan], or
384+
infinite, [x] itself is returned. *)
388385

389386
external ceil : t -> t = "caml_ceil_float32_bytecode" "ceilf"
390387
[@@unboxed] [@@noalloc]
@@ -461,6 +458,24 @@ val max : t -> t -> t
461458
(** [max x y] returns the maximum of [x] and [y]. It returns [nan]
462459
when [x] or [y] is [nan]. Moreover [max (-0.s) (+0.s) = +0.s] *)
463460

461+
module With_weird_nan_behavior : sig
462+
external min : t -> t -> t
463+
= "caml_sse_float32_min_bytecode" "caml_sse_float32_min"
464+
[@@noalloc] [@@unboxed] [@@builtin]
465+
(** [min x y] returns the minimum of [x] and [y].
466+
If either [x] or [y] is [nan], [y] is returned.
467+
If both [x] and [y] equal zero, [y] is returned.
468+
The amd64 flambda-backend compiler translates this call to MINSS. *)
469+
470+
external max : t -> t -> t
471+
= "caml_sse_float32_max_bytecode" "caml_sse_float32_max"
472+
[@@noalloc] [@@unboxed] [@@builtin]
473+
(** [max x y] returns the maximum of [x] and [y].
474+
If either [x] or [y] is [nan], [y] is returned.
475+
If both [x] and [y] equal zero, [y] is returned.
476+
The amd64 flambda-backend compiler translates this call to MAXSS. *)
477+
end
478+
464479
val min_max : t -> t -> t * t
465480
(** [min_max x y] is [(min x y, max x y)], just more efficient. *)
466481

@@ -479,6 +494,34 @@ val min_max_num : t -> t -> t * t
479494
efficient. Note that in particular [min_max_num x nan = (x, x)]
480495
and [min_max_num nan y = (y, y)]. *)
481496

497+
external iround_half_to_even : t -> int64
498+
= "caml_sse_cast_float32_int64_bytecode" "caml_sse_cast_float32_int64"
499+
[@@noalloc] [@@unboxed] [@@builtin]
500+
(** Rounds a [float32] to an [int64] using the current rounding mode. The default
501+
rounding mode is "round half to even", and we expect that no program will
502+
change the rounding mode.
503+
If the argument is NaN or infinite or if the rounded value cannot be
504+
represented, then the result is unspecified.
505+
The amd64 flambda-backend compiler translates this call to CVTSS2SI. *)
506+
507+
val round_half_to_even : t -> t
508+
(** Rounds a [float32] to an integer [float32] using the current rounding
509+
mode. The default rounding mode is "round half to even", and we
510+
expect that no program will change the rounding mode.
511+
The amd64 flambda-backend compiler translates this call to ROUNDSS. *)
512+
513+
val round_down : t -> t
514+
(** Rounds a [float32] down to the next integer [float32] toward negative infinity.
515+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
516+
517+
val round_up : t -> t
518+
(** Rounds a [float32] up to the next integer [float32] toward positive infinity.
519+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
520+
521+
val round_towards_zero : t -> t
522+
(** Rounds a [float32] to the next integer [float32] toward zero.
523+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
524+
482525
val seeded_hash : int -> t -> int
483526
(** A seeded hash function for floats, with the same output value as
484527
{!Hashtbl.seeded_hash}. This function allows this module to be passed as

otherlibs/stdlib_beta/float32_u.ml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,22 @@ let[@inline always] min x y = of_float32 (Float32.min (to_float32 x) (to_float32
184184

185185
let[@inline always] max x y = of_float32 (Float32.max (to_float32 x) (to_float32 y))
186186

187+
module With_weird_nan_behavior = struct
188+
let[@inline always] min x y = of_float32 (Float32.With_weird_nan_behavior.min (to_float32 x) (to_float32 y))
189+
190+
let[@inline always] max x y = of_float32 (Float32.With_weird_nan_behavior.max (to_float32 x) (to_float32 y))
191+
end
192+
187193
let[@inline always] min_num x y = of_float32 (Float32.min_num (to_float32 x) (to_float32 y))
188194

189195
let[@inline always] max_num x y = of_float32 (Float32.max_num (to_float32 x) (to_float32 y))
196+
197+
let iround_half_to_even x = unbox_int64 (Float32.iround_half_to_even (to_float32 x))
198+
199+
let round_half_to_even x = of_float32 (Float32.round_half_to_even (to_float32 x))
200+
201+
let round_down x = of_float32 (Float32.round_down (to_float32 x))
202+
203+
let round_up x = of_float32 (Float32.round_up (to_float32 x))
204+
205+
let round_towards_zero x = of_float32 (Float32.round_towards_zero (to_float32 x))

otherlibs/stdlib_beta/float32_u.mli

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,20 @@ val max : t -> t -> t
364364
(** [max x y] returns the maximum of [x] and [y]. It returns [nan]
365365
when [x] or [y] is [nan]. Moreover [max #-0.s #+0.s = #+0.s] *)
366366

367+
module With_weird_nan_behavior : sig
368+
val min : t -> t -> t
369+
(** [min x y] returns the minimum of [x] and [y].
370+
If either [x] or [y] is [nan], [y] is returned.
371+
If both [x] and [y] equal zero, [y] is returned.
372+
The amd64 flambda-backend compiler translates this call to MINSS. *)
373+
374+
val max : t -> t -> t
375+
(** [max x y] returns the maximum of [x] and [y].
376+
If either [x] or [y] is [nan], [y] is returned.
377+
If both [x] and [y] equal zero, [y] is returned.
378+
The amd64 flambda-backend compiler translates this call to MAXSS. *)
379+
end
380+
367381
val min_num : t -> t -> t
368382
(** [min_num x y] returns the minimum of [x] and [y] treating [nan] as
369383
missing values. If both [x] and [y] are [nan], [nan] is returned.
@@ -374,5 +388,31 @@ val max_num : t -> t -> t
374388
missing values. If both [x] and [y] are [nan] [nan] is returned.
375389
Moreover [max_num #-0.s #+0.s = #+0.s] *)
376390

391+
val iround_half_to_even : t -> int64#
392+
(** Rounds a [float32#] to an [int64#] using the current rounding mode. The default
393+
rounding mode is "round half to even", and we expect that no program will
394+
change the rounding mode.
395+
If the argument is NaN or infinite or if the rounded value cannot be
396+
represented, then the result is unspecified.
397+
The amd64 flambda-backend compiler translates this call to CVTSS2SI. *)
398+
399+
val round_half_to_even : t -> t
400+
(** Rounds a [float32#] to an integer [float32#] using the current rounding
401+
mode. The default rounding mode is "round half to even", and we
402+
expect that no program will change the rounding mode.
403+
The amd64 flambda-backend compiler translates this call to ROUNDSS. *)
404+
405+
val round_down : t -> t
406+
(** Rounds a [float32#] down to the next integer [float32#] toward negative infinity.
407+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
408+
409+
val round_up : t -> t
410+
(** Rounds a [float32#] up to the next integer [float32#] toward positive infinity.
411+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
412+
413+
val round_towards_zero : t -> t
414+
(** Rounds a [float32#] to the next integer [float32#] toward zero.
415+
The amd64 flambda-backend compiler translates this call to ROUNDSS.*)
416+
377417
(* CR layouts v5: add back hash when we deal with the ad-hoc polymorphic
378418
functions. *)

runtime/float32.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i)
309309
return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i)));
310310
}
311311

312+
float caml_sse_float32_min(float x, float y) {
313+
return x < y ? x : y;
314+
}
315+
316+
CAMLprim value caml_sse_float32_min_bytecode(value x, value y) {
317+
return Float32_val(x) < Float32_val(y) ? x : y;
318+
}
319+
320+
float caml_sse_float32_max(float x, float y) {
321+
return x > y ? x : y;
322+
}
323+
324+
CAMLprim value caml_sse_float32_max_bytecode(value x, value y) {
325+
return Float32_val(x) > Float32_val(y) ? x : y;
326+
}
327+
328+
int64_t caml_sse_cast_float32_int64(float f)
329+
{
330+
return llrintf(f);
331+
}
332+
333+
CAMLprim value caml_sse_cast_float32_int64_bytecode(value f)
334+
{
335+
return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f)));
336+
}
337+
338+
#define ROUND_NEG_INF 0x9
339+
#define ROUND_POS_INF 0xA
340+
#define ROUND_ZERO 0xB
341+
#define ROUND_CURRENT 0xC
342+
343+
float caml_sse41_float32_round(int mode, float f) {
344+
switch(mode) {
345+
case ROUND_NEG_INF: return floorf(f);
346+
case ROUND_POS_INF: return ceilf(f);
347+
case ROUND_ZERO: return truncf(f);
348+
case ROUND_CURRENT: return rintf(f);
349+
default: caml_fatal_error("Unknown rounding mode.");
350+
}
351+
}
352+
353+
CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) {
354+
return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f)));
355+
}
356+
312357
enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan };
313358

314359
value caml_classify_float32(float vf)

runtime4/float32.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,51 @@ CAMLprim value caml_ldexp_float32_bytecode(value f, value i)
309309
return caml_copy_float32(caml_ldexp_float32(Float32_val(f), Int_val(i)));
310310
}
311311

312+
float caml_sse_float32_min(float x, float y) {
313+
return x < y ? x : y;
314+
}
315+
316+
CAMLprim value caml_sse_float32_min_bytecode(value x, value y) {
317+
return Float32_val(x) < Float32_val(y) ? x : y;
318+
}
319+
320+
float caml_sse_float32_max(float x, float y) {
321+
return x > y ? x : y;
322+
}
323+
324+
CAMLprim value caml_sse_float32_max_bytecode(value x, value y) {
325+
return Float32_val(x) > Float32_val(y) ? x : y;
326+
}
327+
328+
int64_t caml_sse_cast_float32_int64(float f)
329+
{
330+
return llrintf(f);
331+
}
332+
333+
CAMLprim value caml_sse_cast_float32_int64_bytecode(value f)
334+
{
335+
return caml_copy_int64(caml_sse_cast_float32_int64(Float32_val(f)));
336+
}
337+
338+
#define ROUND_NEG_INF 0x9
339+
#define ROUND_POS_INF 0xA
340+
#define ROUND_ZERO 0xB
341+
#define ROUND_CURRENT 0xC
342+
343+
float caml_sse41_float32_round(int mode, float f) {
344+
switch(mode) {
345+
case ROUND_NEG_INF: return floorf(f);
346+
case ROUND_POS_INF: return ceilf(f);
347+
case ROUND_ZERO: return truncf(f);
348+
case ROUND_CURRENT: return rintf(f);
349+
default: caml_fatal_error("Unknown rounding mode.");
350+
}
351+
}
352+
353+
CAMLprim value caml_sse41_float32_round_bytecode(value mode, value f) {
354+
return caml_copy_float32(caml_sse41_float32_round(Int_val(mode), Float32_val(f)));
355+
}
356+
312357
enum { FP_normal, FP_subnormal, FP_zero, FP_infinite, FP_nan };
313358

314359
value caml_classify_float32(float vf)

0 commit comments

Comments
 (0)