@@ -574,18 +574,28 @@ let instr_for_intop = function
574
574
| Iasr -> I.sar
575
575
| _ -> assert false
576
576
577
- let instr_for_floatop = function
578
- | Iaddf -> I.addsd
579
- | Isubf -> I.subsd
580
- | Imulf -> I.mulsd
581
- | Idivf -> I.divsd
577
+ let instr_for_floatop width op =
578
+ match width, op with
579
+ | Float64, Iaddf -> I.addsd
580
+ | Float64, Isubf -> I.subsd
581
+ | Float64, Imulf -> I.mulsd
582
+ | Float64, Idivf -> I.divsd
583
+ | Float32, Iaddf -> I.addss
584
+ | Float32, Isubf -> I.subss
585
+ | Float32, Imulf -> I.mulss
586
+ | Float32, Idivf -> I.divss
582
587
| _ -> assert false
583
588
584
- let instr_for_floatarithmem = function
585
- | Ifloatadd -> I.addsd
586
- | Ifloatsub -> I.subsd
587
- | Ifloatmul -> I.mulsd
588
- | Ifloatdiv -> I.divsd
589
+ let instr_for_floatarithmem width op =
590
+ match width, op with
591
+ | Float64, Ifloatadd -> I.addsd
592
+ | Float64, Ifloatsub -> I.subsd
593
+ | Float64, Ifloatmul -> I.mulsd
594
+ | Float64, Ifloatdiv -> I.divsd
595
+ | Float32, Ifloatadd -> I.addss
596
+ | Float32, Ifloatsub -> I.subss
597
+ | Float32, Ifloatmul -> I.mulss
598
+ | Float32, Ifloatdiv -> I.divss
589
599
590
600
let cond = function
591
601
| Isigned Ceq -> E | Isigned Cne -> NE
@@ -604,7 +614,8 @@ let output_test_zero arg =
604
614
605
615
(* Output a floating-point compare and branch *)
606
616
607
- let emit_float_test cmp i ~(taken:X86_ast.condition -> unit) =
617
+ let emit_float_test (width : Cmm.float_width)
618
+ cmp i ~(taken:X86_ast.condition -> unit) =
608
619
(* Effect of comisd on flags and conditional branches:
609
620
ZF PF CF cond. branches taken
610
621
unordered 1 1 1 je, jb, jbe, jp
@@ -614,46 +625,51 @@ let emit_float_test cmp i ~(taken:X86_ast.condition -> unit) =
614
625
If FP traps are on (they are off by default),
615
626
comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
616
627
*)
628
+ let ucomi, comi =
629
+ match width with
630
+ | Float64 -> I.ucomisd, I.comisd
631
+ | Float32 -> I.ucomiss, I.comiss
632
+ in
617
633
match cmp with
618
634
| CFeq when arg i 1 = arg i 0 ->
619
- I.ucomisd (arg i 1) (arg i 0);
635
+ ucomi (arg i 1) (arg i 0);
620
636
taken NP
621
637
| CFeq ->
622
638
let next = new_label() in
623
- I.ucomisd (arg i 1) (arg i 0);
639
+ ucomi (arg i 1) (arg i 0);
624
640
I.jp (label next); (* skip if unordered *)
625
641
taken E; (* branch taken if x=y *)
626
642
def_label next
627
643
| CFneq when arg i 1 = arg i 0 ->
628
- I.ucomisd (arg i 1) (arg i 0);
644
+ ucomi (arg i 1) (arg i 0);
629
645
taken P
630
646
| CFneq ->
631
- I.ucomisd (arg i 1) (arg i 0);
647
+ ucomi (arg i 1) (arg i 0);
632
648
taken P; (* branch taken if unordered *)
633
649
taken NE (* branch taken if x<y or x>y *)
634
650
| CFlt ->
635
- I.comisd (arg i 0) (arg i 1);
651
+ comi (arg i 0) (arg i 1);
636
652
taken A (* branch taken if y>x i.e. x<y *)
637
653
| CFnlt ->
638
- I.comisd (arg i 0) (arg i 1);
654
+ comi (arg i 0) (arg i 1);
639
655
taken BE (* taken if unordered or y<=x i.e. !(x<y) *)
640
656
| CFle ->
641
- I.comisd (arg i 0) (arg i 1);(* swap compare *)
657
+ comi (arg i 0) (arg i 1); (* swap compare *)
642
658
taken AE (* branch taken if y>=x i.e. x<=y *)
643
659
| CFnle ->
644
- I.comisd (arg i 0) (arg i 1);(* swap compare *)
660
+ comi (arg i 0) (arg i 1); (* swap compare *)
645
661
taken B (* taken if unordered or y<x i.e. !(x<=y) *)
646
662
| CFgt ->
647
- I.comisd (arg i 1) (arg i 0);
663
+ comi (arg i 1) (arg i 0);
648
664
taken A (* branch taken if x>y *)
649
665
| CFngt ->
650
- I.comisd (arg i 1) (arg i 0);
666
+ comi (arg i 1) (arg i 0);
651
667
taken BE (* taken if unordered or x<=y i.e. !(x>y) *)
652
668
| CFge ->
653
- I.comisd (arg i 1) (arg i 0);(* swap compare *)
669
+ comi (arg i 1) (arg i 0); (* swap compare *)
654
670
taken AE (* branch taken if x>=y *)
655
671
| CFnge ->
656
- I.comisd (arg i 1) (arg i 0);(* swap compare *)
672
+ comi (arg i 1) (arg i 0); (* swap compare *)
657
673
taken B (* taken if unordered or x<y i.e. !(x>=y) *)
658
674
659
675
let emit_test i ~(taken:X86_ast.condition -> unit) = function
@@ -673,8 +689,8 @@ let emit_test i ~(taken:X86_ast.condition -> unit) = function
673
689
| Iinttest_imm(cmp, n) ->
674
690
I.cmp (int n) (arg i 0);
675
691
taken (cond cmp)
676
- | Ifloattest cmp ->
677
- emit_float_test cmp i ~taken
692
+ | Ifloattest (width, cmp) ->
693
+ emit_float_test width cmp i ~taken
678
694
| Ioddtest ->
679
695
I.test (int 1) (arg8 i 0);
680
696
taken NE
@@ -1517,18 +1533,31 @@ let emit_instr ~first ~fallthrough i =
1517
1533
instr_for_intop op (int n) (res i 0)
1518
1534
| Lop(Iintop_atomic{op; size; addr}) ->
1519
1535
emit_atomic i op size addr
1520
- | Lop(Ifloatop(Icompf cmp)) ->
1536
+ | Lop(Ifloatop(Float64, Icompf cmp)) ->
1521
1537
let cond, need_swap = float_cond_and_need_swap cmp in
1522
1538
let a0, a1 = if need_swap then arg i 1, arg i 0 else arg i 0, arg i 1 in
1523
1539
I.cmpsd cond a1 a0;
1524
1540
I.movq a0 (res i 0);
1525
1541
I.neg (res i 0)
1526
- | Lop(Ifloatop(Inegf)) ->
1542
+ | Lop(Ifloatop(Float32, Icompf cmp)) ->
1543
+ let cond, need_swap = float_cond_and_need_swap cmp in
1544
+ let a0, a1 = if need_swap then arg i 1, arg i 0 else arg i 0, arg i 1 in
1545
+ I.cmpss cond a1 a0;
1546
+ I.movd a0 (res32 i 0);
1547
+ (* CMPSS only sets the bottom 32 bits of the result, so we sign-extend to
1548
+ copy the result to the top 32 bits. *)
1549
+ I.movsxd (res32 i 0) (res i 0);
1550
+ I.neg (res i 0)
1551
+ | Lop(Ifloatop(Float64, Inegf)) ->
1527
1552
I.xorpd (mem64_rip VEC128 (emit_symbol "caml_negf_mask")) (res i 0)
1528
- | Lop(Ifloatop(Iabsf)) ->
1553
+ | Lop(Ifloatop(Float64, Iabsf)) ->
1529
1554
I.andpd (mem64_rip VEC128 (emit_symbol "caml_absf_mask")) (res i 0)
1530
- | Lop(Ifloatop(Iaddf | Isubf | Imulf | Idivf as floatop)) ->
1531
- instr_for_floatop floatop (arg i 1) (res i 0)
1555
+ | Lop(Ifloatop(Float32, Inegf)) ->
1556
+ I.xorps (mem64_rip VEC128 (emit_symbol "caml_negf32_mask")) (res i 0)
1557
+ | Lop(Ifloatop(Float32, Iabsf)) ->
1558
+ I.andps (mem64_rip VEC128 (emit_symbol "caml_absf32_mask")) (res i 0)
1559
+ | Lop(Ifloatop(width, (Iaddf | Isubf | Imulf | Idivf as floatop))) ->
1560
+ instr_for_floatop width floatop (arg i 1) (res i 0)
1532
1561
| Lop(Iintofvalue | Ivalueofint | Ivectorcast Bits128) ->
1533
1562
move i.arg.(0) i.res.(0)
1534
1563
| Lop(Iscalarcast (Float_of_int Float64)) ->
@@ -1583,18 +1612,23 @@ let emit_instr ~first ~fallthrough i =
1583
1612
I.mov (nat n) (addressing addr QWORD i 0)
1584
1613
| Lop(Ispecific(Ioffset_loc(n, addr))) ->
1585
1614
I.add (int n) (addressing addr QWORD i 0)
1586
- | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
1587
- instr_for_floatarithmem op (addressing addr REAL8 i 1) (res i 0)
1615
+ | Lop(Ispecific(Ifloatarithmem(Float64, op, addr))) ->
1616
+ instr_for_floatarithmem Float64 op (addressing addr REAL8 i 1) (res i 0)
1617
+ | Lop(Ispecific(Ifloatarithmem(Float32, op, addr))) ->
1618
+ instr_for_floatarithmem Float32 op (addressing addr REAL4 i 1) (res i 0)
1588
1619
| Lop(Ispecific(Ibswap { bitwidth = Sixteen })) ->
1589
1620
I.xchg ah al;
1590
1621
I.movzx (res16 i 0) (res i 0)
1591
1622
| Lop(Ispecific(Ibswap { bitwidth = Thirtytwo })) ->
1592
1623
I.bswap (res32 i 0);
1593
1624
| Lop(Ispecific(Ibswap { bitwidth = Sixtyfour })) ->
1594
1625
I.bswap (res i 0)
1595
- | Lop(Ispecific(Ifloatsqrtf addr)) ->
1626
+ | Lop(Ispecific(Ifloatsqrtf (Float64, addr) )) ->
1596
1627
I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
1597
1628
I.sqrtsd (addressing addr REAL8 i 0) (res i 0)
1629
+ | Lop(Ispecific(Ifloatsqrtf (Float32, _addr))) ->
1630
+ (* CR mslater: (float32) Ifloatsqrtf Float32 *)
1631
+ Misc.fatal_error "Ifloatsqrtf Float32 should never be generated."
1598
1632
| Lop(Ispecific(Isextend32)) ->
1599
1633
I.movsxd (arg32 i 0) (res i 0)
1600
1634
| Lop(Ispecific(Izextend32)) ->
@@ -2016,6 +2050,13 @@ let begin_assembly unix =
2016
2050
_label (emit_symbol "caml_absf_mask");
2017
2051
D.qword (Const 0x7FFFFFFFFFFFFFFFL);
2018
2052
D.qword (Const 0xFFFFFFFFFFFFFFFFL);
2053
+ _label (emit_symbol "caml_negf32_mask");
2054
+ D.qword (Const 0x80000000L);
2055
+ D.qword (Const 0L);
2056
+ D.align ~data:true 16;
2057
+ _label (emit_symbol "caml_absf32_mask");
2058
+ D.qword (Const 0xFFFFFFFF7FFFFFFFL);
2059
+ D.qword (Const 0xFFFFFFFFFFFFFFFFL);
2019
2060
end;
2020
2061
2021
2062
D.data ();
0 commit comments