Skip to content

Commit db58a68

Browse files
authored
Arrays of unboxed float32s (#2554)
1 parent 2ad4c0f commit db58a68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+876
-244
lines changed

backend/amd64/emit.mlp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,8 @@ let emit_simd_instr op i =
10461046
| SSE High_64_to_low_64 -> I.movhlps (arg i 1) (res i 0)
10471047
| SSE Low_64_to_high_64 -> I.movlhps (arg i 1) (res i 0)
10481048
| SSE Interleave_high_32 -> I.unpckhps (arg i 1) (res i 0)
1049-
| SSE Interleave_low_32 -> I.unpcklps (arg i 1) (res i 0)
1049+
| SSE (Interleave_low_32 | Interleave_low_32_regs) ->
1050+
I.unpcklps (arg i 1) (res i 0)
10501051
| SSE Movemask_32 -> I.movmskps (arg i 0) (res i 0)
10511052
| SSE (Shuffle_32 n) -> I.shufps (X86_dsl.int n) (arg i 1) (res i 0)
10521053
| SSE2 Max_scalar_f64 -> I.maxsd (arg i 1) (res i 0)
@@ -1568,6 +1569,8 @@ let emit_instr ~first ~fallthrough i =
15681569
instr_for_floatop width floatop (arg i 1) (res i 0)
15691570
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
15701571
move i.arg.(0) i.res.(0)
1572+
| Lop(Iscalarcast Float32_as_float) ->
1573+
I.movss (arg i 0) (res i 0)
15711574
| Lop(Iscalarcast (Float_of_int Float64)) ->
15721575
I.cvtsi2sd (arg i 0) (res i 0)
15731576
| Lop(Iscalarcast (Float_to_int Float64)) ->

backend/amd64/proc.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ let operation_supported = function
796796
| Cbswap _
797797
| Cclz _ | Cctz _
798798
| Ccmpi _ | Caddv | Cadda | Ccmpa _
799-
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _
799+
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _ | Cpackf32
800800
| Cvalueofint | Cintofvalue
801801
| Ccmpf _
802802
| Craise _

backend/amd64/regalloc_stack_operands.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
192192
May_still_have_spilled_registers
193193
| Op (Scalarcast (Float_of_int (Float32 | Float64) |
194194
Float_to_int (Float32 | Float64) |
195-
Float_of_float32 | Float_to_float32) |
195+
Float_of_float32 | Float_to_float32 |
196+
Float32_as_float) |
196197
Vectorcast _) ->
197198
may_use_stack_operand_for_only_argument map instr ~has_result:true
198199
| Op (Const_symbol _) ->

backend/amd64/reload.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ method! reload_operation op arg res =
160160
(arg', [|r|])
161161
| Iscalarcast (Float_of_int (Float32 | Float64) |
162162
Float_to_int (Float32 | Float64) |
163-
Float_of_float32 | Float_to_float32) ->
163+
Float_of_float32 | Float_to_float32 |
164+
Float32_as_float) ->
164165
(* Result must be in register, but argument can be on stack *)
165166
(arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
166167
| Iscalarcast (V128_to_scalar (Float64x2) | V128_of_scalar (Float64x2)) ->

backend/amd64/selection.ml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,12 @@ method! select_operation op args dbg =
298298
self#select_floatarith true width Imulf Ifloatmul args
299299
| Cdivf width ->
300300
self#select_floatarith false width Idivf Ifloatdiv args
301+
| Cpackf32 ->
302+
(* We must operate on registers. This is because if the second argument
303+
was a float stack slot, the resulting UNPCKLPS instruction would
304+
enforce the validity of loading it as a 128-bit memory location,
305+
even though it only loads 64 bits. *)
306+
Ispecific (Isimd (SSE Interleave_low_32_regs)), args
301307
(* Special cases overriding C implementations (regardless of [@@builtin]). *)
302308
| Cextcall { func = ("sqrt" as func); _ }
303309
| Cextcall { func = ("caml_int64_bits_of_float_unboxed" as func); _ }

backend/amd64/simd.ml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type sse_operation =
8282
| Low_64_to_high_64
8383
| Interleave_high_32
8484
| Interleave_low_32
85+
| Interleave_low_32_regs
8586
| Movemask_32
8687
| Shuffle_32 of int
8788

@@ -305,14 +306,15 @@ let equal_operation_sse l r =
305306
| Low_64_to_high_64, Low_64_to_high_64
306307
| Interleave_high_32, Interleave_high_32
307308
| Interleave_low_32, Interleave_low_32
309+
| Interleave_low_32_regs, Interleave_low_32_regs
308310
| Movemask_32, Movemask_32 ->
309311
true
310312
| Cmp_f32 l, Cmp_f32 r when float_condition_equal l r -> true
311313
| Shuffle_32 l, Shuffle_32 r when Int.equal l r -> true
312314
| ( ( Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 | Rcp_f32
313315
| Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64
314-
| Interleave_high_32 | Interleave_low_32 | Movemask_32 | Cmp_f32 _
315-
| Shuffle_32 _ ),
316+
| Interleave_high_32 | Interleave_low_32_regs | Interleave_low_32
317+
| Movemask_32 | Cmp_f32 _ | Shuffle_32 _ ),
316318
_ ) ->
317319
false
318320

@@ -637,6 +639,8 @@ let print_operation_sse printreg op ppf arg =
637639
fprintf ppf "interleave_high_32 %a %a" printreg arg.(0) printreg arg.(1)
638640
| Interleave_low_32 ->
639641
fprintf ppf "interleave_low_32 %a %a" printreg arg.(0) printreg arg.(1)
642+
| Interleave_low_32_regs ->
643+
fprintf ppf "interleave_low_32_regs %a %a" printreg arg.(0) printreg arg.(1)
640644

641645
let print_operation_sse2 printreg op ppf arg =
642646
match op with
@@ -922,7 +926,8 @@ let class_of_operation_bmi2 = function Deposit_64 | Extract_64 -> Pure
922926
let class_of_operation_sse = function
923927
| Cmp_f32 _ | Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32
924928
| Rcp_f32 | Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64
925-
| Interleave_high_32 | Interleave_low_32 | Movemask_32 | Shuffle_32 _ ->
929+
| Interleave_high_32 | Interleave_low_32 | Interleave_low_32_regs
930+
| Movemask_32 | Shuffle_32 _ ->
926931
Pure
927932

928933
let class_of_operation_sse2 = function

backend/amd64/simd_proc.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ let register_behavior_sse = function
4343
| Interleave_low_32 | Interleave_high_32 | Shuffle_32 _ ->
4444
R_RM_to_fst
4545
| Rcp_f32 | Sqrt_f32 | Rsqrt_f32 -> RM_to_R
46-
| High_64_to_low_64 | Low_64_to_high_64 -> R_R_to_fst
46+
| Interleave_low_32_regs | High_64_to_low_64 | Low_64_to_high_64 -> R_R_to_fst
4747
| Movemask_32 -> R_to_R
4848

4949
let register_behavior_sse2 = function

backend/arm64/emit.mlp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,8 @@ module BR = Branch_relaxation.Make (struct
554554
| Lop (Ivectorcast _) -> 1
555555
| Lop (Iscalarcast (Float_of_int Float64 | Float_to_int Float64)) -> 1
556556
| Lop (Iscalarcast (Float_of_int Float32 | Float_to_int Float32 |
557-
Float_of_float32 | Float_to_float32)) ->
557+
Float_of_float32 | Float_to_float32 |
558+
Float32_as_float)) ->
558559
(* CR mslater: (float32) arm64 *)
559560
Misc.fatal_error "float32 is not supported on this architecture"
560561
| Lop (Iscalarcast (V128_of_scalar _ | V128_to_scalar _)) ->
@@ -784,7 +785,8 @@ let emit_instr i =
784785
| Lop(Iscalarcast (Float_of_int Float64)) ->
785786
` scvtf {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
786787
| Lop (Iscalarcast (Float_of_int Float32 | Float_to_int Float32 |
787-
Float_of_float32 | Float_to_float32)) ->
788+
Float_of_float32 | Float_to_float32 |
789+
Float32_as_float)) ->
788790
(* CR mslater: (float32) arm64 *)
789791
Misc.fatal_error "float32 not supported on this architecture"
790792
| Lop(Iscalarcast (V128_of_scalar _ | V128_to_scalar _) | Ivectorcast _) ->

backend/arm64/proc.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,11 @@ let operation_supported = function
489489
(* CR mslater: (float32) arm64 *)
490490
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
491491
| Csubf Float32 | Cmulf Float32 | Cdivf Float32
492+
| Cpackf32
492493
| Cvectorcast _ | Cscalarcast (Float_of_float32 | Float_to_float32 |
493494
Float_to_int Float32 | Float_of_int Float32 |
494-
V128_of_scalar _ | V128_to_scalar _)
495+
V128_of_scalar _ | V128_to_scalar _ |
496+
Float32_as_float)
495497
-> false (* Not implemented *)
496498
| Cbswap _
497499
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _

backend/cfg/cfg.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ let dump_op ppf = function
287287
| Valueofint -> Format.fprintf ppf "valueofint"
288288
| Intofvalue -> Format.fprintf ppf "intofvalue"
289289
| Vectorcast Bits128 -> Format.fprintf ppf "vec128->vec128"
290+
| Scalarcast Float32_as_float -> Format.fprintf ppf "float32 as float"
290291
| Scalarcast (Float_of_int Float64) -> Format.fprintf ppf "int->float"
291292
| Scalarcast (Float_to_int Float64) -> Format.fprintf ppf "float->int"
292293
| Scalarcast (Float_of_int Float32) -> Format.fprintf ppf "int->float32"

backend/cmm.ml

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ type vector_cast =
207207
| Bits128
208208

209209
type scalar_cast =
210+
| Float32_as_float
210211
| Float_to_int of float_width
211212
| Float_of_int of float_width
212213
| Float_to_float32
@@ -248,6 +249,7 @@ type operation =
248249
| Cnegf of float_width | Cabsf of float_width
249250
| Caddf of float_width | Csubf of float_width
250251
| Cmulf of float_width | Cdivf of float_width
252+
| Cpackf32
251253
| Cvalueofint | Cintofvalue
252254
| Cvectorcast of vector_cast
253255
| Cscalarcast of scalar_cast
@@ -563,25 +565,17 @@ let equal_float_width left right =
563565

564566
let equal_scalar_cast left right =
565567
match left, right with
568+
| Float32_as_float, Float32_as_float -> true
566569
| Float_to_float32, Float_to_float32 -> true
567570
| Float_of_float32, Float_of_float32 -> true
568571
| Float_to_int f1, Float_to_int f2 -> equal_float_width f1 f2
569572
| Float_of_int f1, Float_of_int f2 -> equal_float_width f1 f2
570573
| V128_to_scalar v1, V128_to_scalar v2 -> Primitive.equal_vec128_type v1 v2
571574
| V128_of_scalar v1, V128_of_scalar v2 -> Primitive.equal_vec128_type v1 v2
572-
| Float_to_float32, (Float_of_float32 | Float_to_int _ | Float_of_int _ |
573-
V128_to_scalar _ | V128_of_scalar _)
574-
| Float_of_float32, (Float_to_float32 | Float_to_int _ | Float_of_int _ |
575-
V128_to_scalar _ | V128_of_scalar _)
576-
| Float_to_int _, (Float_of_float32 | Float_to_float32 | Float_of_int _ |
577-
V128_to_scalar _ | V128_of_scalar _)
578-
| Float_of_int _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
579-
V128_to_scalar _ | V128_of_scalar _)
580-
| V128_to_scalar _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
581-
Float_of_int _ | V128_of_scalar _)
582-
| V128_of_scalar _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
583-
Float_of_int _ | V128_to_scalar _)
584-
-> false
575+
| (Float32_as_float |
576+
Float_to_float32 | Float_of_float32 |
577+
Float_to_int _ | Float_of_int _ |
578+
V128_to_scalar _ | V128_of_scalar _), _ -> false
585579

586580
let equal_float_comparison left right =
587581
match left, right with

backend/cmm.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ type vector_cast =
182182
| Bits128
183183

184184
type scalar_cast =
185+
(* CR mslater: move all bit-casts into a reinterpret_cast type *)
186+
| Float32_as_float
185187
| Float_to_int of float_width
186188
| Float_of_int of float_width
187189
| Float_to_float32
@@ -228,6 +230,7 @@ type operation =
228230
| Cnegf of float_width | Cabsf of float_width
229231
| Caddf of float_width | Csubf of float_width
230232
| Cmulf of float_width | Cdivf of float_width
233+
| Cpackf32
231234
| Cvalueofint | Cintofvalue
232235
| Cvectorcast of vector_cast
233236
| Cscalarcast of scalar_cast

0 commit comments

Comments
 (0)