fixes

TheNumbat · TheNumbat · commit b5718d283ae1 · 2024-05-02T14:04:47.000-04:00
diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp
@@ -786,7 +786,7 @@ let move (src : Reg.t) (dst : Reg.t) =
     begin match src.typ, src.loc, dst.typ, dst.loc with
     | Float, Reg _, Float, Reg _
     | Float32, Reg _, Float32, Reg _
-    | Vec128, _, Vec128, _  (* Vec128 stack slots are always aligned. *) ->
+    | Vec128, _, Vec128, _ (* Vec128 stack slots are always aligned. *) ->
       I.movapd (reg src) (reg dst)
     | Float, _, Float, _ ->
       I.movsd (reg src) (reg dst)
@@ -796,8 +796,9 @@ let move (src : Reg.t) (dst : Reg.t) =
       I.mov (reg src) (reg dst)
     | _ ->
       Misc.fatal_errorf
-        "Illegal move between registers of different types (%s to %s)\n"
-        (Reg.name src) (Reg.name dst)
+        "Illegal move between registers of differing types (%s:%a to %s:%a)\n"
+        (Reg.name src) Printcmm.machtype_component src.typ
+        (Reg.name dst) Printcmm.machtype_component dst.typ
     end
 
 let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
@@ -1103,7 +1104,9 @@ let emit_simd_instr op i =
   | SSE2 I32_to_unsigned_i16 -> I.packusdw (arg i 1) (res i 0)
   | SSE2 Cast_scalar_f64_i64 -> I.cvtsd2si (arg i 0) (res i 0)
   | SSE2 Bit_cast_f64_i64 -> I.movq (arg i 0) (res i 0)
-  | SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res i 0)
+  | SSE2 Bit_cast_i64_f64 -> I.movq (arg i 0) (res i 0)
+  | SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res32 i 0)
+  | SSE2 Bit_cast_i32_f32 -> I.movd (arg32 i 0) (res i 0)
   | SSE2 SLL_i16 -> I.psllw (arg i 1) (res i 0)
   | SSE2 SLL_i32 -> I.pslld (arg i 1) (res i 0)
   | SSE2 SLL_i64 -> I.psllq (arg i 1) (res i 0)
diff --git a/backend/amd64/simd.ml b/backend/amd64/simd.ml
@@ -88,6 +88,8 @@ type sse_operation =
 type sse2_operation =
   | Bit_cast_f64_i64
   | Bit_cast_f32_i32
+  | Bit_cast_i64_f64
+  | Bit_cast_i32_f32
   | Sqrt_scalar_f64
   | Sqrt_scalar_f32
   | Cast_scalar_f64_i64
@@ -323,6 +325,8 @@ let equal_operation_sse2 l r =
   | Sqrt_scalar_f32, Sqrt_scalar_f32
   | Bit_cast_f64_i64, Bit_cast_f64_i64
   | Bit_cast_f32_i32, Bit_cast_f32_i32
+  | Bit_cast_i64_f64, Bit_cast_i64_f64
+  | Bit_cast_i32_f32, Bit_cast_i32_f32
   | Sqrt_f64, Sqrt_f64
   | Add_i8, Add_i8
   | Add_i16, Add_i16
@@ -412,25 +416,26 @@ let equal_operation_sse2 l r =
   | Cmp_f64 l, Cmp_f64 r when float_condition_equal l r -> true
   | ( ( Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Min_scalar_f64
       | Max_scalar_f64 | Cast_scalar_f64_i64 | Bit_cast_f64_i64
-      | Bit_cast_f32_i32 | Sqrt_scalar_f64 | Sqrt_scalar_f32 | Sqrt_f64
-      | Add_saturating_unsigned_i8 | Add_saturating_unsigned_i16
-      | Add_saturating_i8 | Add_saturating_i16 | Sub_i8 | Sub_i16 | Sub_i32
-      | Sub_i64 | Sub_f64 | Sub_saturating_unsigned_i8
-      | Sub_saturating_unsigned_i16 | Sub_saturating_i8 | Sub_saturating_i16
-      | Max_unsigned_i8 | Max_i16 | Max_f64 | Min_unsigned_i8 | Min_i16
-      | Min_f64 | Mul_f64 | Div_f64 | And_bits | Andnot_bits | Or_bits
-      | Xor_bits | Movemask_8 | Movemask_64 | Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32
-      | Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32 | I32_to_f64 | I32_to_f32 | F64_to_i32
-      | F64_to_f32 | F32_to_i32 | F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64
-      | SRL_i16 | SRL_i32 | SRL_i64 | SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16
-      | I16_to_unsigned_i8 | I32_to_unsigned_i16 | Avg_unsigned_i8
-      | Avg_unsigned_i16 | SAD_unsigned_i8 | Interleave_high_8
-      | Interleave_high_16 | Interleave_high_64 | Interleave_low_8
-      | Interleave_low_16 | Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _
-      | SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _
-      | SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ | Cmp_f64 _
-      | Shuffle_64 _ | Shuffle_high_16 _ | Shuffle_low_16 _ | Mulhi_i16
-      | Mulhi_unsigned_i16 | Mullo_i16 | Mul_hadd_i16_to_i32 ),
+      | Bit_cast_f32_i32 | Bit_cast_i64_f64 | Bit_cast_i32_f32 | Sqrt_scalar_f64
+      | Sqrt_scalar_f32 | Sqrt_f64 | Add_saturating_unsigned_i8
+      | Add_saturating_unsigned_i16 | Add_saturating_i8 | Add_saturating_i16
+      | Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64
+      | Sub_saturating_unsigned_i8 | Sub_saturating_unsigned_i16
+      | Sub_saturating_i8 | Sub_saturating_i16 | Max_unsigned_i8 | Max_i16
+      | Max_f64 | Min_unsigned_i8 | Min_i16 | Min_f64 | Mul_f64 | Div_f64
+      | And_bits | Andnot_bits | Or_bits | Xor_bits | Movemask_8 | Movemask_64
+      | Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32 | Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32
+      | I32_to_f64 | I32_to_f32 | F64_to_i32 | F64_to_f32 | F32_to_i32
+      | F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64 | SRL_i16 | SRL_i32 | SRL_i64
+      | SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16 | I16_to_unsigned_i8
+      | I32_to_unsigned_i16 | Avg_unsigned_i8 | Avg_unsigned_i16
+      | SAD_unsigned_i8 | Interleave_high_8 | Interleave_high_16
+      | Interleave_high_64 | Interleave_low_8 | Interleave_low_16
+      | Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _
+      | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _
+      | Shift_right_bytes _ | Cmp_f64 _ | Shuffle_64 _ | Shuffle_high_16 _
+      | Shuffle_low_16 _ | Mulhi_i16 | Mulhi_unsigned_i16 | Mullo_i16
+      | Mul_hadd_i16_to_i32 ),
       _ ) ->
     false
 
@@ -710,6 +715,8 @@ let print_operation_sse2 printreg op ppf arg =
   | Cast_scalar_f64_i64 -> fprintf ppf "cast_scalar_f64_i64 %a" printreg arg.(0)
   | Bit_cast_f32_i32 -> fprintf ppf "bit_cast_f32_i32 %a" printreg arg.(0)
   | Bit_cast_f64_i64 -> fprintf ppf "bit_cast_f64_i64 %a" printreg arg.(0)
+  | Bit_cast_i32_f32 -> fprintf ppf "bit_cast_i32_f32 %a" printreg arg.(0)
+  | Bit_cast_i64_f64 -> fprintf ppf "bit_cast_i64_f64 %a" printreg arg.(0)
   | I32_to_f64 -> fprintf ppf "i32_to_f64 %a" printreg arg.(0)
   | I32_to_f32 -> fprintf ppf "i32_to_f32 %a" printreg arg.(0)
   | F64_to_i32 -> fprintf ppf "f64_to_i32 %a" printreg arg.(0)
@@ -920,9 +927,9 @@ let class_of_operation_sse = function
 
 let class_of_operation_sse2 = function
   | Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Add_saturating_i8
-  | Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Min_scalar_f64
-  | Max_scalar_f64 | Sqrt_scalar_f64 | Sqrt_scalar_f32 | Sqrt_f64
-  | Add_saturating_i16 | Add_saturating_unsigned_i8
+  | Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Bit_cast_i64_f64
+  | Bit_cast_i32_f32 | Min_scalar_f64 | Max_scalar_f64 | Sqrt_scalar_f64
+  | Sqrt_scalar_f32 | Sqrt_f64 | Add_saturating_i16 | Add_saturating_unsigned_i8
   | Add_saturating_unsigned_i16 | Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64
   | Sub_saturating_i8 | Sub_saturating_i16 | Sub_saturating_unsigned_i8
   | Sub_saturating_unsigned_i16 | Max_unsigned_i8 | Max_i16 | Max_f64
diff --git a/backend/amd64/simd_proc.ml b/backend/amd64/simd_proc.ml
@@ -63,14 +63,14 @@ let register_behavior_sse2 = function
   | Mullo_i16 | Mul_hadd_i16_to_i32 ->
     R_RM_to_fst
   | Shuffle_high_16 _ | Shuffle_low_16 _ | I32_to_f64 | I32_to_f32 | F64_to_i32
-  | Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | F64_to_f32
-  | F32_to_i32 | F32_to_f64 | Sqrt_f64 ->
+  | Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Bit_cast_i64_f64
+  | Bit_cast_i32_f32 | F64_to_f32 | F32_to_i32 | F32_to_f64 | Sqrt_f64
+  | Sqrt_scalar_f64 | Sqrt_scalar_f32 ->
     RM_to_R
   | SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _
   | SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ ->
     R_to_fst
   | Movemask_8 | Movemask_64 -> R_to_R
-  | Sqrt_scalar_f64 | Sqrt_scalar_f32 -> (* Backwards compatibility *) R_to_R
 
 let register_behavior_sse3 = function
   | Addsub_f32 | Addsub_f64 | Hadd_f32 | Hadd_f64 | Hsub_f32 | Hsub_f64 ->
diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml
@@ -102,10 +102,10 @@ let select_operation_sse op args =
 
 let select_operation_sse2 op args =
   match op with
-  | "caml_int64_bits_of_float_unboxed" | "caml_int64_bits_to_float_unboxed" ->
-    Some (Bit_cast_f64_i64, args)
-  | "caml_float32_of_bits" | "caml_float32_to_bits" ->
-    Some (Bit_cast_f32_i32, args)
+  | "caml_int64_bits_of_float_unboxed" -> Some (Bit_cast_f64_i64, args)
+  | "caml_int64_bits_to_float_unboxed" -> Some (Bit_cast_i64_f64, args)
+  | "caml_float32_of_bits" -> Some (Bit_cast_i32_f32, args)
+  | "caml_float32_to_bits" -> Some (Bit_cast_f32_i32, args)
   | "caml_sse2_float64_sqrt" | "sqrt" -> Some (Sqrt_scalar_f64, args)
   | "caml_sse2_float32_sqrt" | "sqrtf" -> Some (Sqrt_scalar_f32, args)
   | "caml_sse2_float64_max" -> Some (Max_scalar_f64, args)
diff --git a/backend/selectgen.ml b/backend/selectgen.ml
@@ -171,7 +171,10 @@ let oper_result_type = function
     Ccmpi _ | Ccmpa _ | Ccmpf _ -> typ_int
   | Caddv -> typ_val
   | Cadda -> typ_addr
-  | Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _ -> typ_float
+  | Cnegf Float64 | Cabsf Float64 | Caddf Float64
+  | Csubf Float64 | Cmulf Float64 | Cdivf Float64 -> typ_float
+  | Cnegf Float32 | Cabsf Float32 | Caddf Float32
+  | Csubf Float32 | Cmulf Float32 | Cdivf Float32 -> typ_float32
   | Ccsel ty -> ty
   | Cvalueofint -> typ_val
   | Cintofvalue -> typ_int