Skip to content

Commit 9c308a3

Browse files
authored
float32 backend operations (#2385)
1 parent c52d40b commit 9c308a3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1340
-392
lines changed

backend/amd64/arch.ml

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,18 @@ type prefetch_info = {
139139

140140
type bswap_bitwidth = Sixteen | Thirtytwo | Sixtyfour
141141

142+
type float_width = Cmm.float_width
143+
142144
type specific_operation =
143-
Ilea of addressing_mode (* "lea" gives scaled adds *)
145+
Ilea of addressing_mode (* "lea" gives scaled adds *)
144146
| Istore_int of nativeint * addressing_mode * bool
145-
(* Store an integer constant *)
146-
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
147-
| Ifloatarithmem of float_operation * addressing_mode
147+
(* Store an integer constant *)
148+
| Ioffset_loc of int * addressing_mode
149+
(* Add a constant to a location *)
150+
| Ifloatarithmem of float_width * float_operation * addressing_mode
148151
(* Float arith operation with memory *)
149-
| Ifloatsqrtf of addressing_mode (* Float square root from memory *)
152+
| Ifloatsqrtf of float_width * addressing_mode
153+
(* Float square root from memory *)
150154
| Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
151155
| Isextend32 (* 32 to 64 bit conversion with sign
152156
extension *)
@@ -166,7 +170,10 @@ type specific_operation =
166170
}
167171

168172
and float_operation =
169-
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
173+
| Ifloatadd
174+
| Ifloatsub
175+
| Ifloatmul
176+
| Ifloatdiv
170177

171178
(* Sizes, endianness *)
172179

@@ -244,16 +251,23 @@ let print_specific_operation printreg op ppf arg =
244251
(if is_assign then "(assign)" else "(init)")
245252
| Ioffset_loc(n, addr) ->
246253
fprintf ppf "[%a] +:= %i" (print_addressing printreg addr) arg n
247-
| Ifloatsqrtf addr ->
254+
| Ifloatsqrtf (Float64, addr) ->
248255
fprintf ppf "sqrtf float64[%a]"
249256
(print_addressing printreg addr) [|arg.(0)|]
250-
| Ifloatarithmem(op, addr) ->
251-
let op_name = function
252-
| Ifloatadd -> "+f"
253-
| Ifloatsub -> "-f"
254-
| Ifloatmul -> "*f"
255-
| Ifloatdiv -> "/f" in
256-
fprintf ppf "%a %s float64[%a]" printreg arg.(0) (op_name op)
257+
| Ifloatsqrtf (Float32, addr) ->
258+
fprintf ppf "sqrtf float32[%a]"
259+
(print_addressing printreg addr) [|arg.(0)|]
260+
| Ifloatarithmem(width, op, addr) ->
261+
let op_name = match width, op with
262+
| Float64, Ifloatadd -> "+f"
263+
| Float64, Ifloatsub -> "-f"
264+
| Float64, Ifloatmul -> "*f"
265+
| Float64, Ifloatdiv -> "/f"
266+
| Float32, Ifloatadd -> "+f32"
267+
| Float32, Ifloatsub -> "-f32"
268+
| Float32, Ifloatmul -> "*f32"
269+
| Float32, Ifloatdiv -> "/f32" in
270+
fprintf ppf "%a %s float64[%a]" printreg arg.(0) op_name
257271
(print_addressing printreg addr)
258272
(Array.sub arg 1 (Array.length arg - 1))
259273
| Ibswap { bitwidth } ->
@@ -360,9 +374,9 @@ let equal_prefetch_temporal_locality_hint left right =
360374

361375
let equal_float_operation left right =
362376
match left, right with
363-
| Ifloatadd, Ifloatadd -> true
364-
| Ifloatsub, Ifloatsub -> true
365-
| Ifloatmul, Ifloatmul -> true
377+
| Ifloatadd, Ifloatadd
378+
| Ifloatsub, Ifloatsub
379+
| Ifloatmul, Ifloatmul
366380
| Ifloatdiv, Ifloatdiv -> true
367381
| (Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv), _ -> false
368382

@@ -373,11 +387,14 @@ let equal_specific_operation left right =
373387
Nativeint.equal x y && equal_addressing_mode x' y' && Bool.equal x'' y''
374388
| Ioffset_loc (x, x'), Ioffset_loc (y, y') ->
375389
Int.equal x y && equal_addressing_mode x' y'
376-
| Ifloatarithmem (x, x'), Ifloatarithmem (y, y') ->
377-
equal_float_operation x y && equal_addressing_mode x' y'
390+
| Ifloatarithmem (xw, x, x'), Ifloatarithmem (yw, y, y') ->
391+
Cmm.equal_float_width xw yw &&
392+
equal_float_operation x y &&
393+
equal_addressing_mode x' y'
378394
| Ibswap { bitwidth = left }, Ibswap { bitwidth = right } ->
379395
Int.equal (int_of_bswap_bitwidth left) (int_of_bswap_bitwidth right)
380-
| Ifloatsqrtf left, Ifloatsqrtf right ->
396+
| Ifloatsqrtf (left_w, left), Ifloatsqrtf (right_w, right) ->
397+
Cmm.equal_float_width left_w right_w &&
381398
equal_addressing_mode left right
382399
| Isextend32, Isextend32 ->
383400
true

backend/amd64/arch.mli

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,17 @@ type prefetch_info = {
6868

6969
type bswap_bitwidth = Sixteen | Thirtytwo | Sixtyfour
7070

71+
type float_width = Cmm.float_width
72+
7173
type specific_operation =
7274
Ilea of addressing_mode (* "lea" gives scaled adds *)
7375
| Istore_int of nativeint * addressing_mode * bool
7476
(* Store an integer constant *)
7577
| Ioffset_loc of int * addressing_mode (* Add a constant to a location *)
76-
| Ifloatarithmem of float_operation * addressing_mode
78+
| Ifloatarithmem of float_width * float_operation * addressing_mode
7779
(* Float arith operation with memory *)
78-
| Ifloatsqrtf of addressing_mode (* Float square root from memory *)
80+
| Ifloatsqrtf of float_width * addressing_mode
81+
(* Float square root from memory *)
7982
| Ibswap of { bitwidth: bswap_bitwidth; } (* endianness conversion *)
8083
| Isextend32 (* 32 to 64 bit conversion with sign
8184
extension *)
@@ -95,7 +98,10 @@ type specific_operation =
9598
}
9699

97100
and float_operation =
98-
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
101+
| Ifloatadd
102+
| Ifloatsub
103+
| Ifloatmul
104+
| Ifloatdiv
99105

100106
val equal_specific_operation : specific_operation -> specific_operation -> bool
101107

backend/amd64/emit.mlp

Lines changed: 74 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -574,18 +574,28 @@ let instr_for_intop = function
574574
| Iasr -> I.sar
575575
| _ -> assert false
576576

577-
let instr_for_floatop = function
578-
| Iaddf -> I.addsd
579-
| Isubf -> I.subsd
580-
| Imulf -> I.mulsd
581-
| Idivf -> I.divsd
577+
let instr_for_floatop width op =
578+
match width, op with
579+
| Float64, Iaddf -> I.addsd
580+
| Float64, Isubf -> I.subsd
581+
| Float64, Imulf -> I.mulsd
582+
| Float64, Idivf -> I.divsd
583+
| Float32, Iaddf -> I.addss
584+
| Float32, Isubf -> I.subss
585+
| Float32, Imulf -> I.mulss
586+
| Float32, Idivf -> I.divss
582587
| _ -> assert false
583588

584-
let instr_for_floatarithmem = function
585-
| Ifloatadd -> I.addsd
586-
| Ifloatsub -> I.subsd
587-
| Ifloatmul -> I.mulsd
588-
| Ifloatdiv -> I.divsd
589+
let instr_for_floatarithmem width op =
590+
match width, op with
591+
| Float64, Ifloatadd -> I.addsd
592+
| Float64, Ifloatsub -> I.subsd
593+
| Float64, Ifloatmul -> I.mulsd
594+
| Float64, Ifloatdiv -> I.divsd
595+
| Float32, Ifloatadd -> I.addss
596+
| Float32, Ifloatsub -> I.subss
597+
| Float32, Ifloatmul -> I.mulss
598+
| Float32, Ifloatdiv -> I.divss
589599

590600
let cond = function
591601
| Isigned Ceq -> E | Isigned Cne -> NE
@@ -604,7 +614,8 @@ let output_test_zero arg =
604614

605615
(* Output a floating-point compare and branch *)
606616

607-
let emit_float_test cmp i ~(taken:X86_ast.condition -> unit) =
617+
let emit_float_test (width : Cmm.float_width)
618+
cmp i ~(taken:X86_ast.condition -> unit) =
608619
(* Effect of comisd on flags and conditional branches:
609620
ZF PF CF cond. branches taken
610621
unordered 1 1 1 je, jb, jbe, jp
@@ -614,46 +625,51 @@ let emit_float_test cmp i ~(taken:X86_ast.condition -> unit) =
614625
If FP traps are on (they are off by default),
615626
comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
616627
*)
628+
let ucomi, comi =
629+
match width with
630+
| Float64 -> I.ucomisd, I.comisd
631+
| Float32 -> I.ucomiss, I.comiss
632+
in
617633
match cmp with
618634
| CFeq when arg i 1 = arg i 0 ->
619-
I.ucomisd (arg i 1) (arg i 0);
635+
ucomi (arg i 1) (arg i 0);
620636
taken NP
621637
| CFeq ->
622638
let next = new_label() in
623-
I.ucomisd (arg i 1) (arg i 0);
639+
ucomi (arg i 1) (arg i 0);
624640
I.jp (label next); (* skip if unordered *)
625641
taken E; (* branch taken if x=y *)
626642
def_label next
627643
| CFneq when arg i 1 = arg i 0 ->
628-
I.ucomisd (arg i 1) (arg i 0);
644+
ucomi (arg i 1) (arg i 0);
629645
taken P
630646
| CFneq ->
631-
I.ucomisd (arg i 1) (arg i 0);
647+
ucomi (arg i 1) (arg i 0);
632648
taken P; (* branch taken if unordered *)
633649
taken NE (* branch taken if x<y or x>y *)
634650
| CFlt ->
635-
I.comisd (arg i 0) (arg i 1);
651+
comi (arg i 0) (arg i 1);
636652
taken A (* branch taken if y>x i.e. x<y *)
637653
| CFnlt ->
638-
I.comisd (arg i 0) (arg i 1);
654+
comi (arg i 0) (arg i 1);
639655
taken BE (* taken if unordered or y<=x i.e. !(x<y) *)
640656
| CFle ->
641-
I.comisd (arg i 0) (arg i 1);(* swap compare *)
657+
comi (arg i 0) (arg i 1); (* swap compare *)
642658
taken AE (* branch taken if y>=x i.e. x<=y *)
643659
| CFnle ->
644-
I.comisd (arg i 0) (arg i 1);(* swap compare *)
660+
comi (arg i 0) (arg i 1); (* swap compare *)
645661
taken B (* taken if unordered or y<x i.e. !(x<=y) *)
646662
| CFgt ->
647-
I.comisd (arg i 1) (arg i 0);
663+
comi (arg i 1) (arg i 0);
648664
taken A (* branch taken if x>y *)
649665
| CFngt ->
650-
I.comisd (arg i 1) (arg i 0);
666+
comi (arg i 1) (arg i 0);
651667
taken BE (* taken if unordered or x<=y i.e. !(x>y) *)
652668
| CFge ->
653-
I.comisd (arg i 1) (arg i 0);(* swap compare *)
669+
comi (arg i 1) (arg i 0); (* swap compare *)
654670
taken AE (* branch taken if x>=y *)
655671
| CFnge ->
656-
I.comisd (arg i 1) (arg i 0);(* swap compare *)
672+
comi (arg i 1) (arg i 0); (* swap compare *)
657673
taken B (* taken if unordered or x<y i.e. !(x>=y) *)
658674

659675
let emit_test i ~(taken:X86_ast.condition -> unit) = function
@@ -673,8 +689,8 @@ let emit_test i ~(taken:X86_ast.condition -> unit) = function
673689
| Iinttest_imm(cmp, n) ->
674690
I.cmp (int n) (arg i 0);
675691
taken (cond cmp)
676-
| Ifloattest cmp ->
677-
emit_float_test cmp i ~taken
692+
| Ifloattest (width, cmp) ->
693+
emit_float_test width cmp i ~taken
678694
| Ioddtest ->
679695
I.test (int 1) (arg8 i 0);
680696
taken NE
@@ -1517,18 +1533,31 @@ let emit_instr ~first ~fallthrough i =
15171533
instr_for_intop op (int n) (res i 0)
15181534
| Lop(Iintop_atomic{op; size; addr}) ->
15191535
emit_atomic i op size addr
1520-
| Lop(Ifloatop(Icompf cmp)) ->
1536+
| Lop(Ifloatop(Float64, Icompf cmp)) ->
15211537
let cond, need_swap = float_cond_and_need_swap cmp in
15221538
let a0, a1 = if need_swap then arg i 1, arg i 0 else arg i 0, arg i 1 in
15231539
I.cmpsd cond a1 a0;
15241540
I.movq a0 (res i 0);
15251541
I.neg (res i 0)
1526-
| Lop(Ifloatop(Inegf)) ->
1542+
| Lop(Ifloatop(Float32, Icompf cmp)) ->
1543+
let cond, need_swap = float_cond_and_need_swap cmp in
1544+
let a0, a1 = if need_swap then arg i 1, arg i 0 else arg i 0, arg i 1 in
1545+
I.cmpss cond a1 a0;
1546+
I.movd a0 (res32 i 0);
1547+
(* CMPSS only sets the bottom 32 bits of the result, so we sign-extend to
1548+
copy the result to the top 32 bits. *)
1549+
I.movsxd (res32 i 0) (res i 0);
1550+
I.neg (res i 0)
1551+
| Lop(Ifloatop(Float64, Inegf)) ->
15271552
I.xorpd (mem64_rip VEC128 (emit_symbol "caml_negf_mask")) (res i 0)
1528-
| Lop(Ifloatop(Iabsf)) ->
1553+
| Lop(Ifloatop(Float64, Iabsf)) ->
15291554
I.andpd (mem64_rip VEC128 (emit_symbol "caml_absf_mask")) (res i 0)
1530-
| Lop(Ifloatop(Iaddf | Isubf | Imulf | Idivf as floatop)) ->
1531-
instr_for_floatop floatop (arg i 1) (res i 0)
1555+
| Lop(Ifloatop(Float32, Inegf)) ->
1556+
I.xorps (mem64_rip VEC128 (emit_symbol "caml_negf32_mask")) (res i 0)
1557+
| Lop(Ifloatop(Float32, Iabsf)) ->
1558+
I.andps (mem64_rip VEC128 (emit_symbol "caml_absf32_mask")) (res i 0)
1559+
| Lop(Ifloatop(width, (Iaddf | Isubf | Imulf | Idivf as floatop))) ->
1560+
instr_for_floatop width floatop (arg i 1) (res i 0)
15321561
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
15331562
move i.arg.(0) i.res.(0)
15341563
| Lop(Iscalarcast (Float_of_int Float64)) ->
@@ -1583,18 +1612,23 @@ let emit_instr ~first ~fallthrough i =
15831612
I.mov (nat n) (addressing addr QWORD i 0)
15841613
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
15851614
I.add (int n) (addressing addr QWORD i 0)
1586-
| Lop(Ispecific(Ifloatarithmem(op, addr))) ->
1587-
instr_for_floatarithmem op (addressing addr REAL8 i 1) (res i 0)
1615+
| Lop(Ispecific(Ifloatarithmem(Float64, op, addr))) ->
1616+
instr_for_floatarithmem Float64 op (addressing addr REAL8 i 1) (res i 0)
1617+
| Lop(Ispecific(Ifloatarithmem(Float32, op, addr))) ->
1618+
instr_for_floatarithmem Float32 op (addressing addr REAL4 i 1) (res i 0)
15881619
| Lop(Ispecific(Ibswap { bitwidth = Sixteen })) ->
15891620
I.xchg ah al;
15901621
I.movzx (res16 i 0) (res i 0)
15911622
| Lop(Ispecific(Ibswap { bitwidth = Thirtytwo })) ->
15921623
I.bswap (res32 i 0);
15931624
| Lop(Ispecific(Ibswap { bitwidth = Sixtyfour })) ->
15941625
I.bswap (res i 0)
1595-
| Lop(Ispecific(Ifloatsqrtf addr)) ->
1626+
| Lop(Ispecific(Ifloatsqrtf (Float64, addr))) ->
15961627
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
15971628
I.sqrtsd (addressing addr REAL8 i 0) (res i 0)
1629+
| Lop(Ispecific(Ifloatsqrtf (Float32, _addr))) ->
1630+
(* CR mslater: (float32) Ifloatsqrtf Float32 *)
1631+
Misc.fatal_error "Ifloatsqrtf Float32 should never be generated."
15981632
| Lop(Ispecific(Isextend32)) ->
15991633
I.movsxd (arg32 i 0) (res i 0)
16001634
| Lop(Ispecific(Izextend32)) ->
@@ -2016,6 +2050,13 @@ let begin_assembly unix =
20162050
_label (emit_symbol "caml_absf_mask");
20172051
D.qword (Const 0x7FFFFFFFFFFFFFFFL);
20182052
D.qword (Const 0xFFFFFFFFFFFFFFFFL);
2053+
_label (emit_symbol "caml_negf32_mask");
2054+
D.qword (Const 0x80000000L);
2055+
D.qword (Const 0L);
2056+
D.align ~data:true 16;
2057+
_label (emit_symbol "caml_absf32_mask");
2058+
D.qword (Const 0xFFFFFFFF7FFFFFFFL);
2059+
D.qword (Const 0xFFFFFFFFFFFFFFFFL);
20192060
end;
20202061

20212062
D.data ();

backend/amd64/proc.ml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ let destroyed_at_oper = function
488488
| Iop(Ispecific(Isextend32 | Izextend32 | Ilea _
489489
| Istore_int (_, _, _) | Ioffset_loc (_, _)
490490
| Ipause | Iprefetch _
491-
| Ifloatarithmem (_, _) | Ifloatsqrtf _ | Ibswap _))
491+
| Ifloatarithmem (_, _, _) | Ifloatsqrtf (_, _) | Ibswap _))
492492
| Iop(Iintop(Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
493493
| Ipopcnt | Iclz _ | Ictz _ ))
494494
| Iop(Iintop_imm((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl
@@ -662,7 +662,8 @@ let max_register_pressure =
662662
consumes ~int:(1 + num_destroyed_by_plt_stub) ~float:0
663663
| Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
664664
consumes ~int:1 ~float:0
665-
| Istore(Single { reg = Float64 }, _, _) | Ifloatop (Icompf _) ->
665+
| Istore(Single { reg = Float64 }, _, _)
666+
| Ifloatop ((Float64 | Float32), Icompf _) ->
666667
consumes ~int:0 ~float:1
667668
| Ispecific(Isimd op) ->
668669
(match Simd_proc.register_behavior op with
@@ -688,7 +689,8 @@ let max_register_pressure =
688689
| Single { reg = Float32 } | Double
689690
| Onetwentyeight_aligned | Onetwentyeight_unaligned),
690691
_, _)
691-
| Imove | Ispill | Ireload | Ifloatop (Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf)
692+
| Imove | Ispill | Ireload
693+
| Ifloatop ((Float64 | Float32), (Inegf | Iabsf | Iaddf | Isubf | Imulf | Idivf))
692694
| Icsel _
693695
| Ivalueofint | Iintofvalue | Ivectorcast _ | Iscalarcast _
694696
| Iconst_int _ | Iconst_float _ | Iconst_float32 _
@@ -698,7 +700,8 @@ let max_register_pressure =
698700
| Ispecific(Ilea _ | Isextend32 | Izextend32 | Iprefetch _ | Ipause
699701
| Irdtsc | Irdpmc | Istore_int (_, _, _)
700702
| Ilfence | Isfence | Imfence
701-
| Ioffset_loc (_, _) | Ifloatarithmem (_, _) | Ifloatsqrtf _
703+
| Ioffset_loc (_, _) | Ifloatarithmem (_, _, _)
704+
| Ifloatsqrtf (_, _)
702705
| Ibswap _)
703706
| Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque
704707
| Ibeginregion | Iendregion | Idls_get
@@ -794,7 +797,7 @@ let operation_supported = function
794797
| Cbswap _
795798
| Cclz _ | Cctz _
796799
| Ccmpi _ | Caddv | Cadda | Ccmpa _
797-
| Cnegf | Cabsf | Caddf | Csubf | Cmulf | Cdivf
800+
| Cnegf _ | Cabsf _ | Caddf _ | Csubf _ | Cmulf _ | Cdivf _
798801
| Cvalueofint | Cintofvalue
799802
| Ccmpf _
800803
| Craise _

0 commit comments

Comments
 (0)