Skip to content

Arrays of unboxed float32s #2554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion backend/amd64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,8 @@ let emit_simd_instr op i =
| SSE High_64_to_low_64 -> I.movhlps (arg i 1) (res i 0)
| SSE Low_64_to_high_64 -> I.movlhps (arg i 1) (res i 0)
| SSE Interleave_high_32 -> I.unpckhps (arg i 1) (res i 0)
| SSE Interleave_low_32 -> I.unpcklps (arg i 1) (res i 0)
| SSE (Interleave_low_32 | Interleave_low_32_regs) ->
I.unpcklps (arg i 1) (res i 0)
| SSE Movemask_32 -> I.movmskps (arg i 0) (res i 0)
| SSE (Shuffle_32 n) -> I.shufps (X86_dsl.int n) (arg i 1) (res i 0)
| SSE2 Max_scalar_f64 -> I.maxsd (arg i 1) (res i 0)
Expand Down Expand Up @@ -1568,6 +1569,8 @@ let emit_instr ~first ~fallthrough i =
instr_for_floatop width floatop (arg i 1) (res i 0)
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
move i.arg.(0) i.res.(0)
| Lop(Iscalarcast Float32_as_float) ->
I.movss (arg i 0) (res i 0)
| Lop(Iscalarcast (Float_of_int Float64)) ->
I.cvtsi2sd (arg i 0) (res i 0)
| Lop(Iscalarcast (Float_to_int Float64)) ->
Expand Down
2 changes: 1 addition & 1 deletion backend/amd64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ let operation_supported = function
| Cbswap _
| Cclz _ | Cctz _
| Ccmpi _ | Caddv | Cadda | Ccmpa _
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _ | Cpackf32
| Cvalueofint | Cintofvalue
| Ccmpf _
| Craise _
Expand Down
3 changes: 2 additions & 1 deletion backend/amd64/regalloc_stack_operands.ml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ let basic (map : spilled_map) (instr : Cfg.basic Cfg.instruction) =
May_still_have_spilled_registers
| Op (Scalarcast (Float_of_int (Float32 | Float64) |
Float_to_int (Float32 | Float64) |
Float_of_float32 | Float_to_float32) |
Float_of_float32 | Float_to_float32 |
Float32_as_float) |
Vectorcast _) ->
may_use_stack_operand_for_only_argument map instr ~has_result:true
| Op (Const_symbol _) ->
Expand Down
3 changes: 2 additions & 1 deletion backend/amd64/reload.ml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ method! reload_operation op arg res =
(arg', [|r|])
| Iscalarcast (Float_of_int (Float32 | Float64) |
Float_to_int (Float32 | Float64) |
Float_of_float32 | Float_to_float32) ->
Float_of_float32 | Float_to_float32 |
Float32_as_float) ->
(* Result must be in register, but argument can be on stack *)
(arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
| Iscalarcast (V128_to_scalar (Float64x2) | V128_of_scalar (Float64x2)) ->
Expand Down
6 changes: 6 additions & 0 deletions backend/amd64/selection.ml
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ method! select_operation op args dbg =
self#select_floatarith true width Imulf Ifloatmul args
| Cdivf width ->
self#select_floatarith false width Idivf Ifloatdiv args
| Cpackf32 ->
(* We must operate on registers. This is because if the second argument
was a float stack slot, the resulting UNPCKLPS instruction would
enforce the validity of loading it as a 128-bit memory location,
even though it only loads 64 bits. *)
Ispecific (Isimd (SSE Interleave_low_32_regs)), args
(* Special cases overriding C implementations (regardless of [@@builtin]). *)
| Cextcall { func = ("sqrt" as func); _ }
| Cextcall { func = ("caml_int64_bits_of_float_unboxed" as func); _ }
Expand Down
11 changes: 8 additions & 3 deletions backend/amd64/simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type sse_operation =
| Low_64_to_high_64
| Interleave_high_32
| Interleave_low_32
| Interleave_low_32_regs
| Movemask_32
| Shuffle_32 of int

Expand Down Expand Up @@ -305,14 +306,15 @@ let equal_operation_sse l r =
| Low_64_to_high_64, Low_64_to_high_64
| Interleave_high_32, Interleave_high_32
| Interleave_low_32, Interleave_low_32
| Interleave_low_32_regs, Interleave_low_32_regs
| Movemask_32, Movemask_32 ->
true
| Cmp_f32 l, Cmp_f32 r when float_condition_equal l r -> true
| Shuffle_32 l, Shuffle_32 r when Int.equal l r -> true
| ( ( Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32 | Rcp_f32
| Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64
| Interleave_high_32 | Interleave_low_32 | Movemask_32 | Cmp_f32 _
| Shuffle_32 _ ),
| Interleave_high_32 | Interleave_low_32_regs | Interleave_low_32
| Movemask_32 | Cmp_f32 _ | Shuffle_32 _ ),
_ ) ->
false

Expand Down Expand Up @@ -637,6 +639,8 @@ let print_operation_sse printreg op ppf arg =
fprintf ppf "interleave_high_32 %a %a" printreg arg.(0) printreg arg.(1)
| Interleave_low_32 ->
fprintf ppf "interleave_low_32 %a %a" printreg arg.(0) printreg arg.(1)
| Interleave_low_32_regs ->
fprintf ppf "interleave_low_32_regs %a %a" printreg arg.(0) printreg arg.(1)

let print_operation_sse2 printreg op ppf arg =
match op with
Expand Down Expand Up @@ -922,7 +926,8 @@ let class_of_operation_bmi2 = function Deposit_64 | Extract_64 -> Pure
let class_of_operation_sse = function
| Cmp_f32 _ | Add_f32 | Sub_f32 | Mul_f32 | Div_f32 | Max_f32 | Min_f32
| Rcp_f32 | Sqrt_f32 | Rsqrt_f32 | High_64_to_low_64 | Low_64_to_high_64
| Interleave_high_32 | Interleave_low_32 | Movemask_32 | Shuffle_32 _ ->
| Interleave_high_32 | Interleave_low_32 | Interleave_low_32_regs
| Movemask_32 | Shuffle_32 _ ->
Pure

let class_of_operation_sse2 = function
Expand Down
2 changes: 1 addition & 1 deletion backend/amd64/simd_proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ let register_behavior_sse = function
| Interleave_low_32 | Interleave_high_32 | Shuffle_32 _ ->
R_RM_to_fst
| Rcp_f32 | Sqrt_f32 | Rsqrt_f32 -> RM_to_R
| High_64_to_low_64 | Low_64_to_high_64 -> R_R_to_fst
| Interleave_low_32_regs | High_64_to_low_64 | Low_64_to_high_64 -> R_R_to_fst
| Movemask_32 -> R_to_R

let register_behavior_sse2 = function
Expand Down
6 changes: 4 additions & 2 deletions backend/arm64/emit.mlp
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,8 @@ module BR = Branch_relaxation.Make (struct
| Lop (Ivectorcast _) -> 1
| Lop (Iscalarcast (Float_of_int Float64 | Float_to_int Float64)) -> 1
| Lop (Iscalarcast (Float_of_int Float32 | Float_to_int Float32 |
Float_of_float32 | Float_to_float32)) ->
Float_of_float32 | Float_to_float32 |
Float32_as_float)) ->
(* CR mslater: (float32) arm64 *)
Misc.fatal_error "float32 is not supported on this architecture"
| Lop (Iscalarcast (V128_of_scalar _ | V128_to_scalar _)) ->
Expand Down Expand Up @@ -784,7 +785,8 @@ let emit_instr i =
| Lop(Iscalarcast (Float_of_int Float64)) ->
` scvtf {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
| Lop (Iscalarcast (Float_of_int Float32 | Float_to_int Float32 |
Float_of_float32 | Float_to_float32)) ->
Float_of_float32 | Float_to_float32 |
Float32_as_float)) ->
(* CR mslater: (float32) arm64 *)
Misc.fatal_error "float32 not supported on this architecture"
| Lop(Iscalarcast (V128_of_scalar _ | V128_to_scalar _) | Ivectorcast _) ->
Expand Down
4 changes: 3 additions & 1 deletion backend/arm64/proc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -489,9 +489,11 @@ let operation_supported = function
(* CR mslater: (float32) arm64 *)
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
| Csubf Float32 | Cmulf Float32 | Cdivf Float32
| Cpackf32
| Cvectorcast _ | Cscalarcast (Float_of_float32 | Float_to_float32 |
Float_to_int Float32 | Float_of_int Float32 |
V128_of_scalar _ | V128_to_scalar _)
V128_of_scalar _ | V128_to_scalar _ |
Float32_as_float)
-> false (* Not implemented *)
| Cbswap _
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
Expand Down
1 change: 1 addition & 0 deletions backend/cfg/cfg.ml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ let dump_op ppf = function
| Valueofint -> Format.fprintf ppf "valueofint"
| Intofvalue -> Format.fprintf ppf "intofvalue"
| Vectorcast Bits128 -> Format.fprintf ppf "vec128->vec128"
| Scalarcast Float32_as_float -> Format.fprintf ppf "float32 as float"
| Scalarcast (Float_of_int Float64) -> Format.fprintf ppf "int->float"
| Scalarcast (Float_to_int Float64) -> Format.fprintf ppf "float->int"
| Scalarcast (Float_of_int Float32) -> Format.fprintf ppf "int->float32"
Expand Down
20 changes: 7 additions & 13 deletions backend/cmm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ type vector_cast =
| Bits128

type scalar_cast =
| Float32_as_float
| Float_to_int of float_width
| Float_of_int of float_width
| Float_to_float32
Expand Down Expand Up @@ -248,6 +249,7 @@ type operation =
| Cnegf of float_width | Cabsf of float_width
| Caddf of float_width | Csubf of float_width
| Cmulf of float_width | Cdivf of float_width
| Cpackf32
| Cvalueofint | Cintofvalue
| Cvectorcast of vector_cast
| Cscalarcast of scalar_cast
Expand Down Expand Up @@ -563,25 +565,17 @@ let equal_float_width left right =

let equal_scalar_cast left right =
match left, right with
| Float32_as_float, Float32_as_float -> true
| Float_to_float32, Float_to_float32 -> true
| Float_of_float32, Float_of_float32 -> true
| Float_to_int f1, Float_to_int f2 -> equal_float_width f1 f2
| Float_of_int f1, Float_of_int f2 -> equal_float_width f1 f2
| V128_to_scalar v1, V128_to_scalar v2 -> Primitive.equal_vec128_type v1 v2
| V128_of_scalar v1, V128_of_scalar v2 -> Primitive.equal_vec128_type v1 v2
| Float_to_float32, (Float_of_float32 | Float_to_int _ | Float_of_int _ |
V128_to_scalar _ | V128_of_scalar _)
| Float_of_float32, (Float_to_float32 | Float_to_int _ | Float_of_int _ |
V128_to_scalar _ | V128_of_scalar _)
| Float_to_int _, (Float_of_float32 | Float_to_float32 | Float_of_int _ |
V128_to_scalar _ | V128_of_scalar _)
| Float_of_int _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
V128_to_scalar _ | V128_of_scalar _)
| V128_to_scalar _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
Float_of_int _ | V128_of_scalar _)
| V128_of_scalar _, (Float_of_float32 | Float_to_float32 | Float_to_int _ |
Float_of_int _ | V128_to_scalar _)
-> false
| (Float32_as_float |
Float_to_float32 | Float_of_float32 |
Float_to_int _ | Float_of_int _ |
V128_to_scalar _ | V128_of_scalar _), _ -> false

let equal_float_comparison left right =
match left, right with
Expand Down
3 changes: 3 additions & 0 deletions backend/cmm.mli
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ type vector_cast =
| Bits128

type scalar_cast =
(* CR mslater: move all bit-casts into a reinterpret_cast type *)
| Float32_as_float
| Float_to_int of float_width
| Float_of_int of float_width
| Float_to_float32
Expand Down Expand Up @@ -228,6 +230,7 @@ type operation =
| Cnegf of float_width | Cabsf of float_width
| Caddf of float_width | Csubf of float_width
| Cmulf of float_width | Cdivf of float_width
| Cpackf32
| Cvalueofint | Cintofvalue
| Cvectorcast of vector_cast
| Cscalarcast of scalar_cast
Expand Down
Loading
Loading