Skip to content

Commit 39103eb

Browse files
authored
Avoid CMOV with CFeq (#2571)
* CSEL special case for CFeq to avoid a jump for the unordered case * Add a test * Improve comment
1 parent db0f07b commit 39103eb

File tree

3 files changed

+78
-0
lines changed

3 files changed

+78
-0
lines changed

backend/amd64/selection.ml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,20 @@ method! select_operation op args dbg =
346346
Ispecific Izextend32, [arg]
347347
| _ -> super#select_operation op args dbg
348348
end
349+
| Ccsel _ ->
350+
begin match args with
351+
| [cond; ifso; ifnot] ->
352+
let (cond, earg) = self#select_condition cond in
353+
(match cond with
354+
| Ifloattest (w,CFeq) ->
355+
(* CFeq cannot be represented as cmov without a jump.
356+
CFneq emits cmov for "unordered" and "not equal" cases.
357+
Use Cneq and swap the arguments. *)
358+
Icsel (Ifloattest (w, CFneq)), [ earg; ifnot; ifso ]
359+
| _ ->
360+
(Icsel cond, [ earg; ifso; ifnot ]))
361+
| _ -> super#select_operation op args dbg
362+
end
349363
| Cprefetch { is_write; locality; } ->
350364
(* Emit prefetch for read hint when prefetchw is not supported.
351365
Matches the behavior of gcc's __builtin_prefetch *)

tests/intrinsics/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(rule
2+
(alias runtest)
3+
(enabled_if (= %{context_name} "main"))
4+
(deps select_float.ml)
5+
(action (run %{bin:ocamlopt.opt} %{deps} -c)))

tests/intrinsics/select_float.ml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
(* [AMD64] Special case of [caml_csel_value] with a float comparison
2+
avoid branches for [equal].
3+
*)
4+
type t = float#
5+
6+
module Float_u = struct
7+
external to_float : float# -> (float[@local_opt]) = "%box_float"
8+
external of_float : (float[@local_opt]) -> float# = "%unbox_float"
9+
10+
let[@inline] to_bits x = (Int64.bits_of_float) (to_float x)
11+
let[@inline] of_bits x = (Int64.float_of_bits) x |> of_float
12+
13+
let[@inline always] div x y = of_float (Float.div (to_float x) (to_float y))
14+
15+
external select_int64
16+
: bool
17+
-> (int64[@unboxed])
18+
-> (int64[@unboxed])
19+
-> (int64[@unboxed])
20+
= "caml_csel_value" "caml_csel_int64_unboxed"
21+
[@@noalloc] [@@no_effects] [@@no_coeffects] [@@builtin]
22+
23+
let[@inline] select b (ifso : t) (ifnot : t) : t =
24+
select_int64 b (to_bits ifso) (to_bits ifnot) |> of_bits
25+
26+
end
27+
28+
external float_notequal : (float[@local_opt]) -> (float[@local_opt]) -> bool = "%notequal"
29+
external float_equal : (float[@local_opt]) -> (float[@local_opt]) -> bool = "%equal"
30+
31+
32+
let[@inline] divide_unless_denom_zero_else
33+
~(numer : float)
34+
~(denom : float)
35+
~(else_ : float)
36+
=
37+
let is_denom_zero = float_equal denom 0. in
38+
let numer = Float_u.of_float numer in
39+
let denom = Float_u.of_float denom in
40+
let else_ = Float_u.of_float else_ in
41+
Float_u.select
42+
is_denom_zero
43+
else_
44+
(Float_u.div numer denom)
45+
|> Float_u.to_float
46+
;;
47+
48+
let is_result_larger_than_2
49+
~(numer : float)
50+
~(denom : float)
51+
~(else_ : float)
52+
=
53+
let result = divide_unless_denom_zero_else
54+
~numer
55+
~denom
56+
~else_
57+
in
58+
Float.compare result 2. > 0
59+
;;

0 commit comments

Comments
 (0)