ocaml-flambda
diff --git a/‎backend/amd64/CSE.ml
Lines changed: 2 additions & 2 deletions b/‎backend/amd64/CSE.ml
Lines changed: 2 additions & 2 deletions
diff --git a/‎backend/amd64/arch.ml
Lines changed: 4 additions & 15 deletions b/‎backend/amd64/arch.ml
Lines changed: 4 additions & 15 deletions
diff --git a/‎backend/amd64/arch.mli
Lines changed: 0 additions & 2 deletions b/‎backend/amd64/arch.mli
Lines changed: 0 additions & 2 deletions
diff --git a/‎backend/amd64/emit.mlp
Lines changed: 15 additions & 25 deletions b/‎backend/amd64/emit.mlp
Lines changed: 15 additions & 25 deletions
diff --git a/‎backend/amd64/proc.ml
Lines changed: 4 additions & 5 deletions b/‎backend/amd64/proc.ml
Lines changed: 4 additions & 5 deletions
diff --git a/‎backend/amd64/regalloc_stack_operands.ml
Lines changed: 1 addition & 1 deletion b/‎backend/amd64/regalloc_stack_operands.ml
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/amd64/reload.ml
Lines changed: 1 addition & 1 deletion b/‎backend/amd64/reload.ml
Lines changed: 1 addition & 1 deletion
diff --git a/‎backend/amd64/selection.ml
Lines changed: 13 additions & 47 deletions b/‎backend/amd64/selection.ml
Lines changed: 13 additions & 47 deletions
@@ -32,7 +32,7 @@ method! class_of_operation op =
     | Ilea _ | Isextend32 | Izextend32 -> Op_pure
     | Istore_int(_, _, is_asg) -> Op_store is_asg
     | Ioffset_loc(_, _) -> Op_store true
-    | Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
+    | Ifloatarithmem _ -> Op_load Mutable
     | Ibswap _ -> super#class_of_operation op
     | Irdtsc | Irdpmc
     | Ilfence | Isfence | Imfence -> Op_other
@@ -75,7 +75,7 @@ class cfg_cse = object
     | Ilea _ | Isextend32 | Izextend32 -> Op_pure
     | Istore_int(_, _, is_asg) -> Op_store is_asg
     | Ioffset_loc(_, _) -> Op_store true
-    | Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load Mutable
+    | Ifloatarithmem _ -> Op_load Mutable
     | Ibswap _ -> super#class_of_operation op
     | Irdtsc | Irdpmc
     | Ilfence | Isfence | Imfence -> Op_other
 
@@ -149,8 +149,6 @@ type specific_operation =
                                        (* Add a constant to a location *)
   | Ifloatarithmem of float_width * float_operation * addressing_mode
                                        (* Float arith operation with memory *)
-  | Ifloatsqrtf of float_width * addressing_mode
-                                       (* Float square root from memory *)
   | Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
   | Isextend32                         (* 32 to 64 bit conversion with sign
                                           extension *)
@@ -251,12 +249,6 @@ let print_specific_operation printreg op ppf arg =
          (if is_assign then "(assign)" else "(init)")
   | Ioffset_loc(n, addr) ->
       fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
-  | Ifloatsqrtf (Float64, addr) ->
-     fprintf ppf "sqrtf float64[%a]"
-             (print_addressing printreg addr) [|arg.(0)|]
-  | Ifloatsqrtf (Float32, addr) ->
-     fprintf ppf "sqrtf float32[%a]"
-             (print_addressing printreg addr) [|arg.(0)|]
   | Ifloatarithmem(width, op, addr) ->
       let op_name = match width, op with
       | Float64, Ifloatadd -> "+f"
@@ -305,7 +297,7 @@ let win64 =
 
 let operation_is_pure = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
-  | Ifloatarithmem _ | Ifloatsqrtf _ -> true
+  | Ifloatarithmem _  -> true
   | Irdtsc | Irdpmc | Ipause
   | Ilfence | Isfence | Imfence
   | Istore_int (_, _, _) | Ioffset_loc (_, _)
@@ -316,15 +308,15 @@ let operation_is_pure = function
 
 let operation_can_raise = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
-  | Ifloatarithmem _ | Ifloatsqrtf _
+  | Ifloatarithmem _
   | Irdtsc | Irdpmc | Ipause | Isimd _
   | Ilfence | Isfence | Imfence
   | Istore_int (_, _, _) | Ioffset_loc (_, _)
   | Iprefetch _ -> false
 
 let operation_allocates = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
-  | Ifloatarithmem _ | Ifloatsqrtf _
+  | Ifloatarithmem _
   | Irdtsc | Irdpmc | Ipause | Isimd _
   | Ilfence | Isfence | Imfence
   | Istore_int (_, _, _) | Ioffset_loc (_, _)
@@ -393,9 +385,6 @@ let equal_specific_operation left right =
     equal_addressing_mode x' y'
   | Ibswap { bitwidth = left }, Ibswap { bitwidth = right } ->
     Int.equal (int_of_bswap_bitwidth left) (int_of_bswap_bitwidth right)
-  | Ifloatsqrtf (left_w, left), Ifloatsqrtf (right_w, right) ->
-    Cmm.equal_float_width left_w right_w &&
-    equal_addressing_mode left right
   | Isextend32, Isextend32 ->
     true
   | Izextend32, Izextend32 ->
@@ -418,7 +407,7 @@ let equal_specific_operation left right =
     && equal_addressing_mode left_addr right_addr
   | Isimd l, Isimd r ->
     Simd.equal_operation l r
-  | (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _ |
+  | (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
      Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
      Ipause | Isimd _ | Iprefetch _), _ ->
     false
@@ -77,8 +77,6 @@ type specific_operation =
   | Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
   | Ifloatarithmem of float_width * float_operation * addressing_mode
                                        (* Float arith operation with memory *)
-  | Ifloatsqrtf of float_width * addressing_mode
-                                       (* Float square root from memory *)
   | Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
   | Isextend32                         (* 32 to 64 bit conversion with sign
                                           extension *)
 
@@ -801,33 +801,21 @@ let emit_global_label s =
 let move (src : Reg.t) (dst : Reg.t) =
   if src.loc <> dst.loc then
     begin match src.typ, src.loc, dst.typ, dst.loc with
-    | (Float | Float32), Reg.Reg _, (Float | Float32), Reg.Reg _
-    | Vec128, _, Vec128, _ ->
-      (* Vec128 stack slots are always aligned. *)
+    | Float, Reg _, Float, Reg _
+    | Float32, Reg _, Float32, Reg _
+    | Vec128, _, Vec128, _ (* Vec128 stack slots are always aligned. *) ->
       I.movapd (reg src) (reg dst)
     | Float, _, Float, _ ->
       I.movsd (reg src) (reg dst)
     | Float32, _, Float32, _ ->
       I.movss (reg src) (reg dst)
-    | Float, _, Int, _ | Int, _, Float, _ ->
-      (* CR-soon gyorsh: this case is used by the bits_of_float/float_of_bits intrinsics.
-         They should instead generate a separate Ispecific and this case should be
-         removed. *)
-      I.movq (reg src) (reg dst)
     | (Int | Val | Addr), _, (Int | Val | Addr), _ ->
       I.mov (reg src) (reg dst)
-    | Vec128, _, _, _ | _, _, Vec128, _ ->
+    | (Float | Float32 | Vec128 | Int | Val | Addr), _, _, _ ->
       Misc.fatal_errorf
-        "Illegal move between a vector and non-vector register (%s to %s)\n"
-        (Reg.name src) (Reg.name dst)
-    | Float32, _, _, _ | _, _, Float32, _ ->
-      Misc.fatal_errorf
-        "Illegal move between a float32 and non-float32 register (%s to %s)\n"
-        (Reg.name src) (Reg.name dst)
-    | Float, _, (Val | Addr), _ | (Val | Addr), _, Float, _ ->
-      Misc.fatal_errorf
-        "Illegal move between a float and val/addr register (%s to %s)\n"
-        (Reg.name src) (Reg.name dst)
+        "Illegal move between registers of differing types (%s:%a to %s:%a)\n"
+        (Reg.name src) Printcmm.machtype_component src.typ
+        (Reg.name dst) Printcmm.machtype_component dst.typ
     end
 
 let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
@@ -1068,6 +1056,10 @@ let emit_simd_instr op i =
     if arg i 0 <> res i 0 then
       I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
     I.sqrtsd (arg i 0) (res i 0)
+  | SSE2 Sqrt_scalar_f32 ->
+    if arg i 0 <> res i 0 then
+      I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
+    I.sqrtss (arg i 0) (res i 0)
   | SSE2 Sqrt_f64 -> I.sqrtpd (arg i 0) (res i 0)
   | SSE2 Add_i8 -> I.paddb (arg i 1) (res i 0)
   | SSE2 Add_i16 -> I.paddw (arg i 1) (res i 0)
@@ -1128,6 +1120,10 @@ let emit_simd_instr op i =
   | SSE2 I16_to_unsigned_i8 -> I.packuswb (arg i 1) (res i 0)
   | SSE2 I32_to_unsigned_i16 -> I.packusdw (arg i 1) (res i 0)
   | SSE2 Cast_scalar_f64_i64 -> I.cvtsd2si (arg i 0) (res i 0)
+  | SSE2 Bit_cast_f64_i64 -> I.movq (arg i 0) (res i 0)
+  | SSE2 Bit_cast_i64_f64 -> I.movq (arg i 0) (res i 0)
+  | SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res32 i 0)
+  | SSE2 Bit_cast_i32_f32 -> I.movd (arg32 i 0) (res i 0)
   | SSE2 SLL_i16 -> I.psllw (arg i 1) (res i 0)
   | SSE2 SLL_i32 -> I.pslld (arg i 1) (res i 0)
   | SSE2 SLL_i64 -> I.psllq (arg i 1) (res i 0)
@@ -1636,12 +1632,6 @@ let emit_instr ~first ~fallthrough i =
       I.bswap (res32 i 0);
   | Lop(Ispecific(Ibswap { bitwidth = Sixtyfour })) ->
       I.bswap (res i 0)
-  | Lop(Ispecific(Ifloatsqrtf (Float64, addr))) ->
-      I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
-      I.sqrtsd (addressing addr REAL8 i 0) (res i 0)
-  | Lop(Ispecific(Ifloatsqrtf (Float32, _addr))) ->
-      (* CR mslater: (float32) Ifloatsqrtf Float32 *)
-      Misc.fatal_error "Ifloatsqrtf Float32 should never be generated."
   | Lop(Ispecific(Isextend32)) ->
       I.movsxd (arg32 i 0) (res i 0)
   | Lop(Ispecific(Izextend32)) ->
 
@@ -488,7 +488,7 @@ let destroyed_at_oper = function
   | Iop(Ispecific(Isextend32 | Izextend32 | Ilea _
                  | Istore_int (_, _, _) | Ioffset_loc (_, _)
                  | Ipause | Iprefetch _
-                 | Ifloatarithmem (_, _, _) | Ifloatsqrtf (_, _) | Ibswap _))
+                 | Ifloatarithmem (_, _, _) | Ibswap _))
   | Iop(Iintop(Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
               | Ipopcnt | Iclz _ | Ictz _ ))
   | Iop(Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl
@@ -563,7 +563,7 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
        | Begin_region
        | End_region
        | Specific (Ilea _ | Istore_int _ | Ioffset_loc _
-                  | Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _
+                  | Ifloatarithmem _ | Ibswap _
                   | Isextend32 | Izextend32 | Ipause
                   | Iprefetch _ | Ilfence | Isfence | Imfence)
        | Name_for_debugger _ | Dls_get)
@@ -591,7 +591,7 @@ let destroyed_at_terminator (terminator : Cfg_intf.S.terminator) =
   | Call {op = Indirect | Direct _; _} ->
     all_phys_regs ()
   | Specific_can_raise { op = (Ilea _ | Ibswap _ | Isextend32 | Izextend32
-                       | Ifloatarithmem _ | Ifloatsqrtf _ | Irdtsc | Irdpmc | Ipause
+                       | Ifloatarithmem _ | Irdtsc | Irdpmc | Ipause
                        | Isimd _ | Ilfence | Isfence | Imfence
                        | Istore_int (_, _, _) | Ioffset_loc (_, _)
                        | Iprefetch _); _ } ->
@@ -621,7 +621,7 @@ let is_destruction_point ~(more_destruction_points : bool) (terminator : Cfg_int
   | Call {op = Indirect | Direct _; _} ->
     true
   | Specific_can_raise { op = (Ilea _ | Ibswap _ | Isextend32 | Izextend32
-                       | Ifloatarithmem _ | Ifloatsqrtf _ | Irdtsc | Irdpmc | Ipause
+                       | Ifloatarithmem _ | Irdtsc | Irdpmc | Ipause
                        | Isimd _ | Ilfence | Isfence | Imfence
                        | Istore_int (_, _, _) | Ioffset_loc (_, _)
                        | Iprefetch _); _ } ->
@@ -701,7 +701,6 @@ let max_register_pressure =
              | Irdtsc | Irdpmc | Istore_int (_, _, _)
              | Ilfence | Isfence | Imfence
              | Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
-             | Ifloatsqrtf (_, _)
              | Ibswap _)
   | Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque
   | Ibeginregion | Iendregion | Idls_get
 
@@ -236,7 +236,7 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
                  | Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
                  | Ipause
                  | Iprefetch _
-                 | Ibswap _ | Ifloatsqrtf _))
+                 | Ibswap _))
   | Reloadretaddr
   | Pushtrap _
   | Poptrap
 
@@ -187,7 +187,7 @@ method! reload_operation op arg res =
   | Iintop_atomic _
   | Ispecific  (Isextend32 | Izextend32 | Ilea _
                | Istore_int (_, _, _)
-               | Ioffset_loc (_, _) | Ifloatarithmem (_, _, _) | Ifloatsqrtf _
+               | Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
                | Ipause
                | Ilfence | Isfence | Imfence
                | Iprefetch _ | Ibswap _)
 
@@ -173,7 +173,7 @@ let pseudoregs_for_operation op arg res =
                 |Ipopcnt|Iclz _|Ictz _), _)
   | Ispecific (Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
               |Ipause|Ilfence|Isfence|Imfence
-              |Ioffset_loc (_, _)|Ifloatsqrtf _|Irdtsc|Iprefetch _)
+              |Ioffset_loc (_, _)|Irdtsc|Iprefetch _)
   | Imove|Ispill|Ireload|Ivalueofint|Iintofvalue
   | Ivectorcast _ | Iscalarcast _
   | Iconst_int _|Iconst_float32 _|Iconst_float _|Iconst_vec128 _
@@ -298,53 +298,19 @@ method! select_operation op args dbg =
       self#select_floatarith true width Imulf Ifloatmul args
   | Cdivf width ->
       self#select_floatarith false width Idivf Ifloatdiv args
-  (* Special cases overriding C implementations. *)
-  | Cextcall { func = "sqrt"; alloc = false; } ->
-     begin match args with
-       [Cop(Cload { memory_chunk = Double as chunk; _}, [loc], _dbg)] ->
-         let (addr, arg) = self#select_addressing chunk loc in
-         (Ispecific(Ifloatsqrtf (Float64, addr)), [arg])
-     | [arg] ->
-         (Ispecific Simd.(Isimd (SSE2 Sqrt_scalar_f64)), [arg])
-     | _ ->
-         assert false
-    end
-  | Cextcall { func = "caml_int64_bits_of_float_unboxed"; alloc = false;
-               ty = [|Int|]; ty_args = [XFloat] } ->
-      (match args with
-        | [Cop(Cload { memory_chunk = Double; mutability = mut; is_atomic }, [loc], _dbg)] ->
-        let c = Word_int in
-        let (addr, arg) = self#select_addressing c loc in
-        Iload { memory_chunk = c;
-                addressing_mode = addr;
-                mutability = mut;
-                is_atomic; }, [arg]
-      | _ -> Imove, args)
-  | Cextcall { func = "caml_int64_float_of_bits_unboxed"; alloc = false;
-               ty = [|Float|]; ty_args = [XInt64] } ->
-      (match args with
-      | [Cop(Cload { memory_chunk = Word_int; mutability = mut; is_atomic }, [loc], _dbg)] ->
-        let c = Double in
-        let (addr, arg) = self#select_addressing c loc in
-        Iload { memory_chunk = c;
-                addressing_mode = addr;
-                mutability = mut;
-                is_atomic; }, [arg]
-      | _ -> Imove, args)
+  (* Special cases overriding C implementations (regardless of [@@builtin]). *)
+  | Cextcall { func = ("sqrt" as func); _ }
+  | Cextcall { func = ("caml_int64_bits_of_float_unboxed" as func); _ }
+  | Cextcall { func = ("caml_int64_float_of_bits_unboxed" as func); _ }
   (* x86 intrinsics ([@@builtin]) *)
-  (* CR mslater: (float32) casting/sqrt intrinsics *)
-  | Cextcall { func; builtin = true; ty = ret; ty_args = _; } ->
-      begin match func, ret with
-      | "caml_rdtsc_unboxed", [|Int|] -> Ispecific Irdtsc, args
-      | "caml_rdpmc_unboxed", [|Int|] -> Ispecific Irdpmc, args
-      | "caml_pause_hint", ([|Val|] | [| |]) ->
-         Ispecific Ipause, args
-      | "caml_load_fence", ([|Val|] | [| |]) ->
-         Ispecific Ilfence, args
-      | "caml_store_fence", ([|Val|] | [| |]) ->
-         Ispecific Isfence, args
-      | "caml_memory_fence", ([|Val|] | [| |]) ->
-         Ispecific Imfence, args
+  | Cextcall { func; builtin = true; _ } ->
+      begin match func with
+      | "caml_rdtsc_unboxed" -> Ispecific Irdtsc, args
+      | "caml_rdpmc_unboxed" -> Ispecific Irdpmc, args
+      | "caml_pause_hint" -> Ispecific Ipause, args
+      | "caml_load_fence" -> Ispecific Ilfence, args
+      | "caml_store_fence" -> Ispecific Isfence, args
+      | "caml_memory_fence" -> Ispecific Imfence, args
       | _ ->
         (match Simd_selection.select_operation func args with
          | Some (op, args) -> op, args