Skip to content

Commit 6aa3b7a

Browse files
authored
Unify float sqrt/bit-cast intrinsics (#2519)
1 parent 69fb82d commit 6aa3b7a

File tree

15 files changed

+115
-135
lines changed

15 files changed

+115
-135
lines changed

backend/amd64/CSE.ml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ method! class_of_operation op =
3232
| Ilea _ | Isextend32 | Izextend32 -> Op_pure
3333
| Istore_int(_, _, is_asg) -> Op_store is_asg
3434
| Ioffset_loc(_, _) -> Op_store true
35-
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
35+
| Ifloatarithmem _ -> Op_load Mutable
3636
| Ibswap _ -> super#class_of_operation op
3737
| Irdtsc | Irdpmc
3838
| Ilfence | Isfence | Imfence -> Op_other
@@ -75,7 +75,7 @@ class cfg_cse = object
7575
| Ilea _ | Isextend32 | Izextend32 -> Op_pure
7676
| Istore_int(_, _, is_asg) -> Op_store is_asg
7777
| Ioffset_loc(_, _) -> Op_store true
78-
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
78+
| Ifloatarithmem _ -> Op_load Mutable
7979
| Ibswap _ -> super#class_of_operation op
8080
| Irdtsc | Irdpmc
8181
| Ilfence | Isfence | Imfence -> Op_other

backend/amd64/arch.ml

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,6 @@ type specific_operation =
149149
(* Add a constant to a location *)
150150
| Ifloatarithmem of float_width * float_operation * addressing_mode
151151
(* Float arith operation with memory *)
152-
| Ifloatsqrtf of float_width * addressing_mode
153-
(* Float square root from memory *)
154152
| Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
155153
| Isextend32 (* 32 to 64 bit conversion with sign
156154
extension *)
@@ -251,12 +249,6 @@ let print_specific_operation printreg op ppf arg =
251249
(if is_assign then "(assign)" else "(init)")
252250
| Ioffset_loc(n, addr) ->
253251
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
254-
| Ifloatsqrtf (Float64, addr) ->
255-
fprintf ppf "sqrtf float64[%a]"
256-
(print_addressing printreg addr) [|arg.(0)|]
257-
| Ifloatsqrtf (Float32, addr) ->
258-
fprintf ppf "sqrtf float32[%a]"
259-
(print_addressing printreg addr) [|arg.(0)|]
260252
| Ifloatarithmem(width, op, addr) ->
261253
let op_name = match width, op with
262254
| Float64, Ifloatadd -> "+f"
@@ -305,7 +297,7 @@ let win64 =
305297

306298
let operation_is_pure = function
307299
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
308-
| Ifloatarithmem _ | Ifloatsqrtf _ -> true
300+
| Ifloatarithmem _ -> true
309301
| Irdtsc | Irdpmc | Ipause
310302
| Ilfence | Isfence | Imfence
311303
| Istore_int (_, _, _) | Ioffset_loc (_, _)
@@ -316,15 +308,15 @@ let operation_is_pure = function
316308

317309
let operation_can_raise = function
318310
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
319-
| Ifloatarithmem _ | Ifloatsqrtf _
311+
| Ifloatarithmem _
320312
| Irdtsc | Irdpmc | Ipause | Isimd _
321313
| Ilfence | Isfence | Imfence
322314
| Istore_int (_, _, _) | Ioffset_loc (_, _)
323315
| Iprefetch _ -> false
324316

325317
let operation_allocates = function
326318
| Ilea _ | Ibswap _ | Isextend32 | Izextend32
327-
| Ifloatarithmem _ | Ifloatsqrtf _
319+
| Ifloatarithmem _
328320
| Irdtsc | Irdpmc | Ipause | Isimd _
329321
| Ilfence | Isfence | Imfence
330322
| Istore_int (_, _, _) | Ioffset_loc (_, _)
@@ -393,9 +385,6 @@ let equal_specific_operation left right =
393385
equal_addressing_mode x' y'
394386
| Ibswap { bitwidth = left }, Ibswap { bitwidth = right } ->
395387
Int.equal (int_of_bswap_bitwidth left) (int_of_bswap_bitwidth right)
396-
| Ifloatsqrtf (left_w, left), Ifloatsqrtf (right_w, right) ->
397-
Cmm.equal_float_width left_w right_w &&
398-
equal_addressing_mode left right
399388
| Isextend32, Isextend32 ->
400389
true
401390
| Izextend32, Izextend32 ->
@@ -418,7 +407,7 @@ let equal_specific_operation left right =
418407
&& equal_addressing_mode left_addr right_addr
419408
| Isimd l, Isimd r ->
420409
Simd.equal_operation l r
421-
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _ |
410+
| (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
422411
Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
423412
Ipause | Isimd _ | Iprefetch _), _ ->
424413
false

backend/amd64/arch.mli

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ type specific_operation =
7777
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
7878
| Ifloatarithmem of float_width * float_operation * addressing_mode
7979
(* Float arith operation with memory *)
80-
| Ifloatsqrtf of float_width * addressing_mode
81-
(* Float square root from memory *)
8280
| Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
8381
| Isextend32 (* 32 to 64 bit conversion with sign
8482
extension *)

backend/amd64/emit.mlp

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -801,33 +801,21 @@ let emit_global_label s =
801801
let move (src : Reg.t) (dst : Reg.t) =
802802
if src.loc <> dst.loc then
803803
begin match src.typ, src.loc, dst.typ, dst.loc with
804-
| (Float | Float32), Reg.Reg _, (Float | Float32), Reg.Reg _
805-
| Vec128, _, Vec128, _ ->
806-
(* Vec128 stack slots are always aligned. *)
804+
| Float, Reg _, Float, Reg _
805+
| Float32, Reg _, Float32, Reg _
806+
| Vec128, _, Vec128, _ (* Vec128 stack slots are always aligned. *) ->
807807
I.movapd (reg src) (reg dst)
808808
| Float, _, Float, _ ->
809809
I.movsd (reg src) (reg dst)
810810
| Float32, _, Float32, _ ->
811811
I.movss (reg src) (reg dst)
812-
| Float, _, Int, _ | Int, _, Float, _ ->
813-
(* CR-soon gyorsh: this case is used by the bits_of_float/float_of_bits intrinsics.
814-
They should instead generate a separate Ispecific and this case should be
815-
removed. *)
816-
I.movq (reg src) (reg dst)
817812
| (Int | Val | Addr), _, (Int | Val | Addr), _ ->
818813
I.mov (reg src) (reg dst)
819-
| Vec128, _, _, _ | _, _, Vec128, _ ->
814+
| (Float | Float32 | Vec128 | Int | Val | Addr), _, _, _ ->
820815
Misc.fatal_errorf
821-
"Illegal move between a vector and non-vector register (%s to %s)\n"
822-
(Reg.name src) (Reg.name dst)
823-
| Float32, _, _, _ | _, _, Float32, _ ->
824-
Misc.fatal_errorf
825-
"Illegal move between a float32 and non-float32 register (%s to %s)\n"
826-
(Reg.name src) (Reg.name dst)
827-
| Float, _, (Val | Addr), _ | (Val | Addr), _, Float, _ ->
828-
Misc.fatal_errorf
829-
"Illegal move between a float and val/addr register (%s to %s)\n"
830-
(Reg.name src) (Reg.name dst)
816+
"Illegal move between registers of differing types (%s:%a to %s:%a)\n"
817+
(Reg.name src) Printcmm.machtype_component src.typ
818+
(Reg.name dst) Printcmm.machtype_component dst.typ
831819
end
832820

833821
let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
@@ -1068,6 +1056,10 @@ let emit_simd_instr op i =
10681056
if arg i 0 <> res i 0 then
10691057
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
10701058
I.sqrtsd (arg i 0) (res i 0)
1059+
| SSE2 Sqrt_scalar_f32 ->
1060+
if arg i 0 <> res i 0 then
1061+
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
1062+
I.sqrtss (arg i 0) (res i 0)
10711063
| SSE2 Sqrt_f64 -> I.sqrtpd (arg i 0) (res i 0)
10721064
| SSE2 Add_i8 -> I.paddb (arg i 1) (res i 0)
10731065
| SSE2 Add_i16 -> I.paddw (arg i 1) (res i 0)
@@ -1128,6 +1120,10 @@ let emit_simd_instr op i =
11281120
| SSE2 I16_to_unsigned_i8 -> I.packuswb (arg i 1) (res i 0)
11291121
| SSE2 I32_to_unsigned_i16 -> I.packusdw (arg i 1) (res i 0)
11301122
| SSE2 Cast_scalar_f64_i64 -> I.cvtsd2si (arg i 0) (res i 0)
1123+
| SSE2 Bit_cast_f64_i64 -> I.movq (arg i 0) (res i 0)
1124+
| SSE2 Bit_cast_i64_f64 -> I.movq (arg i 0) (res i 0)
1125+
| SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res32 i 0)
1126+
| SSE2 Bit_cast_i32_f32 -> I.movd (arg32 i 0) (res i 0)
11311127
| SSE2 SLL_i16 -> I.psllw (arg i 1) (res i 0)
11321128
| SSE2 SLL_i32 -> I.pslld (arg i 1) (res i 0)
11331129
| SSE2 SLL_i64 -> I.psllq (arg i 1) (res i 0)
@@ -1636,12 +1632,6 @@ let emit_instr ~first ~fallthrough i =
16361632
I.bswap (res32 i 0);
16371633
| Lop(Ispecific(Ibswap { bitwidth = Sixtyfour })) ->
16381634
I.bswap (res i 0)
1639-
| Lop(Ispecific(Ifloatsqrtf (Float64, addr))) ->
1640-
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
1641-
I.sqrtsd (addressing addr REAL8 i 0) (res i 0)
1642-
| Lop(Ispecific(Ifloatsqrtf (Float32, _addr))) ->
1643-
(* CR mslater: (float32) Ifloatsqrtf Float32 *)
1644-
Misc.fatal_error "Ifloatsqrtf Float32 should never be generated."
16451635
| Lop(Ispecific(Isextend32)) ->
16461636
I.movsxd (arg32 i 0) (res i 0)
16471637
| Lop(Ispecific(Izextend32)) ->

backend/amd64/proc.ml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ let destroyed_at_oper = function
488488
| Iop(Ispecific(Isextend32 | Izextend32 | Ilea _
489489
| Istore_int (_, _, _) | Ioffset_loc (_, _)
490490
| Ipause | Iprefetch _
491-
| Ifloatarithmem (_, _, _) | Ifloatsqrtf (_, _) | Ibswap _))
491+
| Ifloatarithmem (_, _, _) | Ibswap _))
492492
| Iop(Iintop(Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
493493
| Ipopcnt | Iclz _ | Ictz _ ))
494494
| Iop(Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl
@@ -563,7 +563,7 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
563563
| Begin_region
564564
| End_region
565565
| Specific (Ilea _ | Istore_int _ | Ioffset_loc _
566-
| Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _
566+
| Ifloatarithmem _ | Ibswap _
567567
| Isextend32 | Izextend32 | Ipause
568568
| Iprefetch _ | Ilfence | Isfence | Imfence)
569569
| Name_for_debugger _ | Dls_get)
@@ -591,7 +591,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
591591
| Call {op = Indirect | Direct _; _} ->
592592
all_phys_regs ()
593593
| Specific_can_raise { op = (Ilea _ | Ibswap _ | Isextend32 | Izextend32
594-
| Ifloatarithmem _ | Ifloatsqrtf _ | Irdtsc | Irdpmc | Ipause
594+
| Ifloatarithmem _ | Irdtsc | Irdpmc | Ipause
595595
| Isimd _ | Ilfence | Isfence | Imfence
596596
| Istore_int (_, _, _) | Ioffset_loc (_, _)
597597
| Iprefetch _); _ } ->
@@ -621,7 +621,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
621621
| Call {op = Indirect | Direct _; _} ->
622622
true
623623
| Specific_can_raise { op = (Ilea _ | Ibswap _ | Isextend32 | Izextend32
624-
| Ifloatarithmem _ | Ifloatsqrtf _ | Irdtsc | Irdpmc | Ipause
624+
| Ifloatarithmem _ | Irdtsc | Irdpmc | Ipause
625625
| Isimd _ | Ilfence | Isfence | Imfence
626626
| Istore_int (_, _, _) | Ioffset_loc (_, _)
627627
| Iprefetch _); _ } ->
@@ -701,7 +701,6 @@ let max_register_pressure =
701701
| Irdtsc | Irdpmc | Istore_int (_, _, _)
702702
| Ilfence | Isfence | Imfence
703703
| Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
704-
| Ifloatsqrtf (_, _)
705704
| Ibswap _)
706705
| Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque
707706
| Ibeginregion | Iendregion | Idls_get

backend/amd64/regalloc_stack_operands.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
236236
| Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
237237
| Ipause
238238
| Iprefetch _
239-
| Ibswap _ | Ifloatsqrtf _))
239+
| Ibswap _))
240240
| Reloadretaddr
241241
| Pushtrap _
242242
| Poptrap

backend/amd64/reload.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ method! reload_operation op arg res =
187187
| Iintop_atomic _
188188
| Ispecific (Isextend32 | Izextend32 | Ilea _
189189
| Istore_int (_, _, _)
190-
| Ioffset_loc (_, _) | Ifloatarithmem (_, _, _) | Ifloatsqrtf _
190+
| Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
191191
| Ipause
192192
| Ilfence | Isfence | Imfence
193193
| Iprefetch _ | Ibswap _)

backend/amd64/selection.ml

Lines changed: 13 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ let pseudoregs_for_operation op arg res =
173173
|Ipopcnt|Iclz _|Ictz _), _)
174174
| Ispecific (Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
175175
|Ipause|Ilfence|Isfence|Imfence
176-
|Ioffset_loc (_, _)|Ifloatsqrtf _|Irdtsc|Iprefetch _)
176+
|Ioffset_loc (_, _)|Irdtsc|Iprefetch _)
177177
| Imove|Ispill|Ireload|Ivalueofint|Iintofvalue
178178
| Ivectorcast _ | Iscalarcast _
179179
| Iconst_int _|Iconst_float32 _|Iconst_float _|Iconst_vec128 _
@@ -298,53 +298,19 @@ method! select_operation op args dbg =
298298
self#select_floatarith true width Imulf Ifloatmul args
299299
| Cdivf width ->
300300
self#select_floatarith false width Idivf Ifloatdiv args
301-
(* Special cases overriding C implementations. *)
302-
| Cextcall { func = "sqrt"; alloc = false; } ->
303-
begin match args with
304-
[Cop(Cload { memory_chunk = Double as chunk; _}, [loc], _dbg)] ->
305-
let (addr, arg) = self#select_addressing chunk loc in
306-
(Ispecific(Ifloatsqrtf (Float64, addr)), [arg])
307-
| [arg] ->
308-
(Ispecific Simd.(Isimd (SSE2 Sqrt_scalar_f64)), [arg])
309-
| _ ->
310-
assert false
311-
end
312-
| Cextcall { func = "caml_int64_bits_of_float_unboxed"; alloc = false;
313-
ty = [|Int|]; ty_args = [XFloat] } ->
314-
(match args with
315-
| [Cop(Cload { memory_chunk = Double; mutability = mut; is_atomic }, [loc], _dbg)] ->
316-
let c = Word_int in
317-
let (addr, arg) = self#select_addressing c loc in
318-
Iload { memory_chunk = c;
319-
addressing_mode = addr;
320-
mutability = mut;
321-
is_atomic; }, [arg]
322-
| _ -> Imove, args)
323-
| Cextcall { func = "caml_int64_float_of_bits_unboxed"; alloc = false;
324-
ty = [|Float|]; ty_args = [XInt64] } ->
325-
(match args with
326-
| [Cop(Cload { memory_chunk = Word_int; mutability = mut; is_atomic }, [loc], _dbg)] ->
327-
let c = Double in
328-
let (addr, arg) = self#select_addressing c loc in
329-
Iload { memory_chunk = c;
330-
addressing_mode = addr;
331-
mutability = mut;
332-
is_atomic; }, [arg]
333-
| _ -> Imove, args)
301+
(* Special cases overriding C implementations (regardless of [@@builtin]). *)
302+
| Cextcall { func = ("sqrt" as func); _ }
303+
| Cextcall { func = ("caml_int64_bits_of_float_unboxed" as func); _ }
304+
| Cextcall { func = ("caml_int64_float_of_bits_unboxed" as func); _ }
334305
(* x86 intrinsics ([@@builtin]) *)
335-
(* CR mslater: (float32) casting/sqrt intrinsics *)
336-
| Cextcall { func; builtin = true; ty = ret; ty_args = _; } ->
337-
begin match func, ret with
338-
| "caml_rdtsc_unboxed", [|Int|] -> Ispecific Irdtsc, args
339-
| "caml_rdpmc_unboxed", [|Int|] -> Ispecific Irdpmc, args
340-
| "caml_pause_hint", ([|Val|] | [| |]) ->
341-
Ispecific Ipause, args
342-
| "caml_load_fence", ([|Val|] | [| |]) ->
343-
Ispecific Ilfence, args
344-
| "caml_store_fence", ([|Val|] | [| |]) ->
345-
Ispecific Isfence, args
346-
| "caml_memory_fence", ([|Val|] | [| |]) ->
347-
Ispecific Imfence, args
306+
| Cextcall { func; builtin = true; _ } ->
307+
begin match func with
308+
| "caml_rdtsc_unboxed" -> Ispecific Irdtsc, args
309+
| "caml_rdpmc_unboxed" -> Ispecific Irdpmc, args
310+
| "caml_pause_hint" -> Ispecific Ipause, args
311+
| "caml_load_fence" -> Ispecific Ilfence, args
312+
| "caml_store_fence" -> Ispecific Isfence, args
313+
| "caml_memory_fence" -> Ispecific Imfence, args
348314
| _ ->
349315
(match Simd_selection.select_operation func args with
350316
| Some (op, args) -> op, args

0 commit comments

Comments
 (0)