Skip to content

Commit b5718d2

Browse files
committed
fixes
1 parent 7a72101 commit b5718d2

File tree

5 files changed

+47
-34
lines changed

5 files changed

+47
-34
lines changed

backend/amd64/emit.mlp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ let move (src : Reg.t) (dst : Reg.t) =
786786
begin match src.typ, src.loc, dst.typ, dst.loc with
787787
| Float, Reg _, Float, Reg _
788788
| Float32, Reg _, Float32, Reg _
789-
| Vec128, _, Vec128, _ (* Vec128 stack slots are always aligned. *) ->
789+
| Vec128, _, Vec128, _ (* Vec128 stack slots are always aligned. *) ->
790790
I.movapd (reg src) (reg dst)
791791
| Float, _, Float, _ ->
792792
I.movsd (reg src) (reg dst)
@@ -796,8 +796,9 @@ let move (src : Reg.t) (dst : Reg.t) =
796796
I.mov (reg src) (reg dst)
797797
| _ ->
798798
Misc.fatal_errorf
799-
"Illegal move between registers of different types (%s to %s)\n"
800-
(Reg.name src) (Reg.name dst)
799+
"Illegal move between registers of differing types (%s:%a to %s:%a)\n"
800+
(Reg.name src) Printcmm.machtype_component src.typ
801+
(Reg.name dst) Printcmm.machtype_component dst.typ
801802
end
802803

803804
let stack_to_stack_move (src : Reg.t) (dst : Reg.t) =
@@ -1103,7 +1104,9 @@ let emit_simd_instr op i =
11031104
| SSE2 I32_to_unsigned_i16 -> I.packusdw (arg i 1) (res i 0)
11041105
| SSE2 Cast_scalar_f64_i64 -> I.cvtsd2si (arg i 0) (res i 0)
11051106
| SSE2 Bit_cast_f64_i64 -> I.movq (arg i 0) (res i 0)
1106-
| SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res i 0)
1107+
| SSE2 Bit_cast_i64_f64 -> I.movq (arg i 0) (res i 0)
1108+
| SSE2 Bit_cast_f32_i32 -> I.movd (arg i 0) (res32 i 0)
1109+
| SSE2 Bit_cast_i32_f32 -> I.movd (arg32 i 0) (res i 0)
11071110
| SSE2 SLL_i16 -> I.psllw (arg i 1) (res i 0)
11081111
| SSE2 SLL_i32 -> I.pslld (arg i 1) (res i 0)
11091112
| SSE2 SLL_i64 -> I.psllq (arg i 1) (res i 0)

backend/amd64/simd.ml

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ type sse_operation =
8888
type sse2_operation =
8989
| Bit_cast_f64_i64
9090
| Bit_cast_f32_i32
91+
| Bit_cast_i64_f64
92+
| Bit_cast_i32_f32
9193
| Sqrt_scalar_f64
9294
| Sqrt_scalar_f32
9395
| Cast_scalar_f64_i64
@@ -323,6 +325,8 @@ let equal_operation_sse2 l r =
323325
| Sqrt_scalar_f32, Sqrt_scalar_f32
324326
| Bit_cast_f64_i64, Bit_cast_f64_i64
325327
| Bit_cast_f32_i32, Bit_cast_f32_i32
328+
| Bit_cast_i64_f64, Bit_cast_i64_f64
329+
| Bit_cast_i32_f32, Bit_cast_i32_f32
326330
| Sqrt_f64, Sqrt_f64
327331
| Add_i8, Add_i8
328332
| Add_i16, Add_i16
@@ -412,25 +416,26 @@ let equal_operation_sse2 l r =
412416
| Cmp_f64 l, Cmp_f64 r when float_condition_equal l r -> true
413417
| ( ( Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Min_scalar_f64
414418
| Max_scalar_f64 | Cast_scalar_f64_i64 | Bit_cast_f64_i64
415-
| Bit_cast_f32_i32 | Sqrt_scalar_f64 | Sqrt_scalar_f32 | Sqrt_f64
416-
| Add_saturating_unsigned_i8 | Add_saturating_unsigned_i16
417-
| Add_saturating_i8 | Add_saturating_i16 | Sub_i8 | Sub_i16 | Sub_i32
418-
| Sub_i64 | Sub_f64 | Sub_saturating_unsigned_i8
419-
| Sub_saturating_unsigned_i16 | Sub_saturating_i8 | Sub_saturating_i16
420-
| Max_unsigned_i8 | Max_i16 | Max_f64 | Min_unsigned_i8 | Min_i16
421-
| Min_f64 | Mul_f64 | Div_f64 | And_bits | Andnot_bits | Or_bits
422-
| Xor_bits | Movemask_8 | Movemask_64 | Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32
423-
| Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32 | I32_to_f64 | I32_to_f32 | F64_to_i32
424-
| F64_to_f32 | F32_to_i32 | F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64
425-
| SRL_i16 | SRL_i32 | SRL_i64 | SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16
426-
| I16_to_unsigned_i8 | I32_to_unsigned_i16 | Avg_unsigned_i8
427-
| Avg_unsigned_i16 | SAD_unsigned_i8 | Interleave_high_8
428-
| Interleave_high_16 | Interleave_high_64 | Interleave_low_8
429-
| Interleave_low_16 | Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _
430-
| SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _
431-
| SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ | Cmp_f64 _
432-
| Shuffle_64 _ | Shuffle_high_16 _ | Shuffle_low_16 _ | Mulhi_i16
433-
| Mulhi_unsigned_i16 | Mullo_i16 | Mul_hadd_i16_to_i32 ),
419+
| Bit_cast_f32_i32 | Bit_cast_i64_f64 | Bit_cast_i32_f32 | Sqrt_scalar_f64
420+
| Sqrt_scalar_f32 | Sqrt_f64 | Add_saturating_unsigned_i8
421+
| Add_saturating_unsigned_i16 | Add_saturating_i8 | Add_saturating_i16
422+
| Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64
423+
| Sub_saturating_unsigned_i8 | Sub_saturating_unsigned_i16
424+
| Sub_saturating_i8 | Sub_saturating_i16 | Max_unsigned_i8 | Max_i16
425+
| Max_f64 | Min_unsigned_i8 | Min_i16 | Min_f64 | Mul_f64 | Div_f64
426+
| And_bits | Andnot_bits | Or_bits | Xor_bits | Movemask_8 | Movemask_64
427+
| Cmpeq_i8 | Cmpeq_i16 | Cmpeq_i32 | Cmpgt_i8 | Cmpgt_i16 | Cmpgt_i32
428+
| I32_to_f64 | I32_to_f32 | F64_to_i32 | F64_to_f32 | F32_to_i32
429+
| F32_to_f64 | SLL_i16 | SLL_i32 | SLL_i64 | SRL_i16 | SRL_i32 | SRL_i64
430+
| SRA_i16 | SRA_i32 | I16_to_i8 | I32_to_i16 | I16_to_unsigned_i8
431+
| I32_to_unsigned_i16 | Avg_unsigned_i8 | Avg_unsigned_i16
432+
| SAD_unsigned_i8 | Interleave_high_8 | Interleave_high_16
433+
| Interleave_high_64 | Interleave_low_8 | Interleave_low_16
434+
| Interleave_low_64 | SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _
435+
| SRLi_i32 _ | SRLi_i64 _ | SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _
436+
| Shift_right_bytes _ | Cmp_f64 _ | Shuffle_64 _ | Shuffle_high_16 _
437+
| Shuffle_low_16 _ | Mulhi_i16 | Mulhi_unsigned_i16 | Mullo_i16
438+
| Mul_hadd_i16_to_i32 ),
434439
_ ) ->
435440
false
436441

@@ -710,6 +715,8 @@ let print_operation_sse2 printreg op ppf arg =
710715
| Cast_scalar_f64_i64 -> fprintf ppf "cast_scalar_f64_i64 %a" printreg arg.(0)
711716
| Bit_cast_f32_i32 -> fprintf ppf "bit_cast_f32_i32 %a" printreg arg.(0)
712717
| Bit_cast_f64_i64 -> fprintf ppf "bit_cast_f64_i64 %a" printreg arg.(0)
718+
| Bit_cast_i32_f32 -> fprintf ppf "bit_cast_i32_f32 %a" printreg arg.(0)
719+
| Bit_cast_i64_f64 -> fprintf ppf "bit_cast_i64_f64 %a" printreg arg.(0)
713720
| I32_to_f64 -> fprintf ppf "i32_to_f64 %a" printreg arg.(0)
714721
| I32_to_f32 -> fprintf ppf "i32_to_f32 %a" printreg arg.(0)
715722
| F64_to_i32 -> fprintf ppf "f64_to_i32 %a" printreg arg.(0)
@@ -920,9 +927,9 @@ let class_of_operation_sse = function
920927

921928
let class_of_operation_sse2 = function
922929
| Add_i8 | Add_i16 | Add_i32 | Add_i64 | Add_f64 | Add_saturating_i8
923-
| Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Min_scalar_f64
924-
| Max_scalar_f64 | Sqrt_scalar_f64 | Sqrt_scalar_f32 | Sqrt_f64
925-
| Add_saturating_i16 | Add_saturating_unsigned_i8
930+
| Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Bit_cast_i64_f64
931+
| Bit_cast_i32_f32 | Min_scalar_f64 | Max_scalar_f64 | Sqrt_scalar_f64
932+
| Sqrt_scalar_f32 | Sqrt_f64 | Add_saturating_i16 | Add_saturating_unsigned_i8
926933
| Add_saturating_unsigned_i16 | Sub_i8 | Sub_i16 | Sub_i32 | Sub_i64 | Sub_f64
927934
| Sub_saturating_i8 | Sub_saturating_i16 | Sub_saturating_unsigned_i8
928935
| Sub_saturating_unsigned_i16 | Max_unsigned_i8 | Max_i16 | Max_f64

backend/amd64/simd_proc.ml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,14 @@ let register_behavior_sse2 = function
6363
| Mullo_i16 | Mul_hadd_i16_to_i32 ->
6464
R_RM_to_fst
6565
| Shuffle_high_16 _ | Shuffle_low_16 _ | I32_to_f64 | I32_to_f32 | F64_to_i32
66-
| Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | F64_to_f32
67-
| F32_to_i32 | F32_to_f64 | Sqrt_f64 ->
66+
| Cast_scalar_f64_i64 | Bit_cast_f64_i64 | Bit_cast_f32_i32 | Bit_cast_i64_f64
67+
| Bit_cast_i32_f32 | F64_to_f32 | F32_to_i32 | F32_to_f64 | Sqrt_f64
68+
| Sqrt_scalar_f64 | Sqrt_scalar_f32 ->
6869
RM_to_R
6970
| SLLi_i16 _ | SLLi_i32 _ | SLLi_i64 _ | SRLi_i16 _ | SRLi_i32 _ | SRLi_i64 _
7071
| SRAi_i16 _ | SRAi_i32 _ | Shift_left_bytes _ | Shift_right_bytes _ ->
7172
R_to_fst
7273
| Movemask_8 | Movemask_64 -> R_to_R
73-
| Sqrt_scalar_f64 | Sqrt_scalar_f32 -> (* Backwards compatibility *) R_to_R
7474

7575
let register_behavior_sse3 = function
7676
| Addsub_f32 | Addsub_f64 | Hadd_f32 | Hadd_f64 | Hsub_f32 | Hsub_f64 ->

backend/amd64/simd_selection.ml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ let select_operation_sse op args =
102102

103103
let select_operation_sse2 op args =
104104
match op with
105-
| "caml_int64_bits_of_float_unboxed" | "caml_int64_bits_to_float_unboxed" ->
106-
Some (Bit_cast_f64_i64, args)
107-
| "caml_float32_of_bits" | "caml_float32_to_bits" ->
108-
Some (Bit_cast_f32_i32, args)
105+
| "caml_int64_bits_of_float_unboxed" -> Some (Bit_cast_f64_i64, args)
106+
| "caml_int64_bits_to_float_unboxed" -> Some (Bit_cast_i64_f64, args)
107+
| "caml_float32_of_bits" -> Some (Bit_cast_i32_f32, args)
108+
| "caml_float32_to_bits" -> Some (Bit_cast_f32_i32, args)
109109
| "caml_sse2_float64_sqrt" | "sqrt" -> Some (Sqrt_scalar_f64, args)
110110
| "caml_sse2_float32_sqrt" | "sqrtf" -> Some (Sqrt_scalar_f32, args)
111111
| "caml_sse2_float64_max" -> Some (Max_scalar_f64, args)

backend/selectgen.ml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,10 @@ let oper_result_type = function
171171
Ccmpi _ | Ccmpa _ | Ccmpf _ -> typ_int
172172
| Caddv -> typ_val
173173
| Cadda -> typ_addr
174-
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _ -> typ_float
174+
| Cnegf Float64 | Cabsf Float64 | Caddf Float64
175+
| Csubf Float64 | Cmulf Float64 | Cdivf Float64 -> typ_float
176+
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
177+
| Csubf Float32 | Cmulf Float32 | Cdivf Float32 -> typ_float32
175178
| Ccsel ty -> ty
176179
| Cvalueofint -> typ_val
177180
| Cintofvalue -> typ_int

0 commit comments

Comments
 (0)