diff --git a/backend/amd64/CSE.ml b/backend/amd64/CSE.ml index 60503d69ce1..9e7e31e2b13 100644 --- a/backend/amd64/CSE.ml +++ b/backend/amd64/CSE.ml @@ -32,6 +32,8 @@ method! class_of_operation op = | Ioffset_loc(_, _) -> Op_store true | Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load | Ibswap _ | Isqrtf -> super#class_of_operation op + | Irdtsc | Irdpmc -> Op_other + | Icrc32q -> Op_pure end | _ -> super#class_of_operation op diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml index 9c8bc887712..1f6dfd6294e 100644 --- a/backend/amd64/arch.ml +++ b/backend/amd64/arch.ml @@ -16,6 +16,9 @@ (* POPCNT instruction is not available prior to Nehalem, released in 2008. *) let popcnt_support = ref true +(* CRC32 requires SSE 4.2 support *) +let crc32_support = ref true + (* Machine-specific command-line options *) let command_line_options = @@ -27,6 +30,10 @@ let command_line_options = " Use POPCNT instruction (not available prior to Nehalem)"; "-fno-popcnt", Arg.Clear popcnt_support, " Do not use POPCNT instruction"; + "-fcrc32", Arg.Set crc32_support, + " Use CRC32 instructions (requires SSE4.2 support)"; + "-fno-crc32", Arg.Clear crc32_support, + " Do not emit CRC32 instructions"; ] (* Specific operations for the AMD64 processor *) @@ -54,6 +61,9 @@ type specific_operation = extension *) | Izextend32 (* 32 to 64 bit conversion with zero extension *) + | Irdtsc (* read timestamp *) + | Irdpmc (* read performance counter *) + | Icrc32q (* compute crc *) and float_operation = Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv @@ -143,6 +153,12 @@ let print_specific_operation printreg op ppf arg = fprintf ppf "sextend32 %a" printreg arg.(0) | Izextend32 -> fprintf ppf "zextend32 %a" printreg arg.(0) + | Irdtsc -> + fprintf ppf "rdtsc" + | Irdpmc -> + fprintf ppf "rdpmc %a" printreg arg.(0) + | Icrc32q -> + fprintf ppf "crc32 %a %a" printreg arg.(0) printreg arg.(1) let win64 = match Config.system with diff --git a/backend/amd64/emit.mlp b/backend/amd64/emit.mlp index 31aa7b4e13a..8baf37191b1 100644 --- a/backend/amd64/emit.mlp +++ b/backend/amd64/emit.mlp @@ -958,6 +958,26 @@ let emit_instr fallthrough i = | Lop(Iintop Ipopcnt) -> assert (!popcnt_support); I.popcnt (arg i 0) (res i 0) + | Lop(Ispecific Irdtsc) -> + assert (reg64 i.res.(0) = RDX); + I.rdtsc (); + (* The instruction fills in the low 32 bits of the result registers. *) + (* Combine edx and eax into a single 64-bit result in rdx. *) + I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *) + (* On processors that support the Intel 64 architecture, + the high-order 32 bits of each of RAX and RDX are cleared. *) + I.or_ rax (res i 0) (* combine high and low into rdx *) + | Lop(Ispecific Irdpmc) -> + assert ((arg64 i 0 = RCX) && (reg64 i.res.(0) = RDX)); + I.rdpmc (); + (* The instruction fills in the low 32 bits of the result registers. *) + (* Combine edx and eax into a single 64-bit result in rdx. *) + I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *) + I.mov eax eax; (* zero-extend eax *) + I.or_ rax (res i 0) (* combine high and low into rdx *) + | Lop (Ispecific Icrc32q) -> + assert (arg i 0 = res i 0); + I.crc32 (arg i 1) (res i 0) | Lop (Iname_for_debugger _) -> () | Lop (Iprobe _) -> let probe_label = new_label () in diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml index c6b6587ec69..e31c1ff7516 100644 --- a/backend/amd64/proc.ml +++ b/backend/amd64/proc.ml @@ -332,6 +332,7 @@ let destroyed_at_oper = function [| loc_spacetime_node_hole |] | Iswitch(_, _) -> [| rax; rdx |] | Itrywith _ -> [| r11 |] + | Iop(Ispecific (Irdtsc | Irdpmc)) -> [| rax |] | _ -> if fp then (* prevent any use of the frame pointer ! *) diff --git a/backend/amd64/reload.ml b/backend/amd64/reload.ml index 16819c09bff..b7501c00d21 100644 --- a/backend/amd64/reload.ml +++ b/backend/amd64/reload.ml @@ -46,6 +46,9 @@ open Mach Iintoffloat R S Ispecific(Ilea) R R R Ispecific(Ifloatarithmem) R R R + Ispecific(Icrc32q) R R S (and Res = Arg1) + Ispecific(Irdtsc) R (and Res = rdx) + Ispecific(Irdpmc) R R (and Res = rdx, Arg1 = rcx) Conditional branches: Iinttest S R @@ -86,6 +89,16 @@ method! reload_operation op arg res = if stackp arg.(0) then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|])) else (arg, res) + | Ispecific (Irdtsc | Irdpmc) -> + (* Irdtsc: res(0) already forced in reg. + Irdpmc: res(0) and arg(0) already forced in regs. *) + (arg, res) + | Ispecific Icrc32q -> + (* First argument and result must be in the same register. + Second argument can be either in a register or on stack. *) + if stackp arg.(0) + then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|])) + else (arg, res) | Ifloatofint | Iintoffloat -> (* Result must be in register, but argument can be on stack *) (arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res)) @@ -97,7 +110,15 @@ method! reload_operation op arg res = if !Clflags.pic_code || !Clflags.dlcode || Arch.win64 then super#reload_operation op arg res else (arg, res) - | _ -> (* Other operations: all args and results in registers *) + | Iintop (Ipopcnt | Iclz _| Ictz _) + | Ispecific (Isqrtf | Isextend32 | Izextend32 | Ilea _ + | Istore_int (_, _, _) + | Ioffset_loc (_, _) | Ifloatarithmem (_, _) + | Ibswap _| Ifloatsqrtf _) + | Imove|Ispill|Ireload|Inegf|Iabsf|Iconst_float _|Icall_ind _|Icall_imm _ + | Itailcall_ind _|Itailcall_imm _|Iextcall _|Istackoffset _|Iload (_, _) + | Istore (_, _, _)|Ialloc _|Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ + -> (* Other operations: all args and results in registers *) super#reload_operation op arg res method! reload_test tst arg = diff --git a/backend/amd64/selection.ml b/backend/amd64/selection.ml index 52b1b2478d1..a6f9ad404e4 100644 --- a/backend/amd64/selection.ml +++ b/backend/amd64/selection.ml @@ -96,6 +96,7 @@ let pseudoregs_for_operation op arg res = ([| rax |], [| rax |]) (* For imulq, first arg must be in rax, rax is clobbered, and result is in rdx. *) + | Ispecific (Ibswap _) -> assert false | Iintop(Imulh) -> ([| rax; arg.(1) |], [| rdx |]) | Ispecific(Ifloatarithmem(_,_)) -> @@ -112,8 +113,30 @@ let pseudoregs_for_operation op arg res = ([| rax; rcx |], [| rax |]) | Iintop(Imod) -> ([| rax; rcx |], [| rdx |]) + | Ispecific Irdtsc -> + (* For rdtsc instruction, the result is in edx (high) and eax (low). + Make it simple and force the result in rdx and rax clobbered. *) + ([| |], [| rdx |]) + | Ispecific Irdpmc -> + (* For rdpmc instruction, the argument must be in ecx + and the result is in edx (high) and eax (low). + Make it simple and force the argument in rcx, the result in rdx, + and rax clobbered *) + ([| rcx |], [| rdx |]) + | Ispecific Icrc32q -> + (* arg.(0) and res.(0) must be the same *) + ([|res.(0); arg.(1)|], res) (* Other instructions are regular *) - | _ -> raise Use_default + | Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound _) + | Iintop_imm ((Imulh|Idiv|Imod|Icomp _|Icheckbound _ + |Ipopcnt|Iclz _|Ictz _), _) + | Ispecific (Isqrtf|Isextend32|Izextend32|Ilea _|Istore_int (_, _, _) + |Ioffset_loc (_, _)|Ifloatsqrtf _) + | Imove|Ispill|Ireload|Ifloatofint|Iintoffloat|Iconst_int _|Iconst_float _ + | Iconst_symbol _|Icall_ind _|Icall_imm _|Itailcall_ind _|Itailcall_imm _ + | Iextcall _|Istackoffset _|Iload (_, _)|Istore (_, _, _)|Ialloc _ + | Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _ + -> raise Use_default (* If you update [inline_ops], you may need to update [is_simple_expr] and/or [effects_of], below. *) @@ -210,7 +233,17 @@ method! select_operation op args dbg = (Ispecific Isqrtf, [arg]) | _ -> assert false - end + end + | Cextcall { name; builtin = true; ret; label_after } -> + begin match name, ret with + | "caml_rdtsc_unboxed", [|Int|] -> Ispecific Irdtsc, args + | "caml_rdpmc_unboxed", [|Int|] -> Ispecific Irdpmc, args + | ("caml_int64_crc_unboxed", [|Int|] + | "caml_int_crc_untagged", [|Int|]) when !Arch.crc32_support -> + Ispecific Icrc32q, args + | _ -> + super#select_operation op args dbg + end (* Recognize store instructions *) | Cstore ((Word_int|Word_val as chunk), _init) -> begin match args with diff --git a/backend/x86_ast.mli b/backend/x86_ast.mli index a457f623b33..fce75bac0fb 100644 --- a/backend/x86_ast.mli +++ b/backend/x86_ast.mli @@ -108,6 +108,7 @@ type instruction = | CMP of arg * arg | COMISD of arg * arg | CQO + | CRC32 of arg * arg | CVTSD2SI of arg * arg | CVTSD2SS of arg * arg | CVTSI2SD of arg * arg @@ -172,6 +173,8 @@ type instruction = | POP of arg | POPCNT of arg * arg | PUSH of arg + | RDTSC + | RDPMC | RET | ROUNDSD of rounding * arg * arg | SAL of arg * arg diff --git a/backend/x86_dsl.ml b/backend/x86_dsl.ml index d28f3f9882d..e2ad7010bcf 100644 --- a/backend/x86_dsl.ml +++ b/backend/x86_dsl.ml @@ -122,6 +122,7 @@ module I = struct let cmp x y = emit (CMP (x, y)) let comisd x y = emit (COMISD (x, y)) let cqo () = emit CQO + let crc32 x y = emit (CRC32 (x, y)) let cvtsd2ss x y = emit (CVTSD2SS (x, y)) let cvtsi2sd x y = emit (CVTSI2SD (x, y)) let cvtss2sd x y = emit (CVTSS2SD (x, y)) @@ -190,6 +191,8 @@ module I = struct let pop x = emit (POP x) let popcnt x y = emit (POPCNT (x, y)) let push x = emit (PUSH x) + let rdtsc () = emit (RDTSC) + let rdpmc () = emit (RDPMC) let ret () = emit RET let sal x y = emit (SAL (x, y)) let sar x y = emit (SAR (x, y)) diff --git a/backend/x86_dsl.mli b/backend/x86_dsl.mli index c4db064c85c..524a14474da 100644 --- a/backend/x86_dsl.mli +++ b/backend/x86_dsl.mli @@ -115,6 +115,7 @@ module I : sig val cmp: arg -> arg -> unit val comisd: arg -> arg -> unit val cqo: unit -> unit + val crc32 : arg -> arg -> unit val cvtsd2ss: arg -> arg -> unit val cvtsi2sd: arg -> arg -> unit val cvtss2sd: arg -> arg -> unit @@ -183,6 +184,8 @@ module I : sig val pop: arg -> unit val popcnt : arg -> arg -> unit val push: arg -> unit + val rdtsc: unit -> unit + val rdpmc: unit -> unit val ret: unit -> unit val sal: arg -> arg -> unit val sar: arg -> arg -> unit diff --git a/backend/x86_gas.ml b/backend/x86_gas.ml index d1ee4b53783..a9b639696fc 100644 --- a/backend/x86_gas.ml +++ b/backend/x86_gas.ml @@ -131,6 +131,7 @@ let print_instr b = function | CMP (arg1, arg2) -> i2_s b "cmp" arg1 arg2 | COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2 | CQO -> i0 b "cqto" + | CRC32 (arg1, arg2) -> i2_s b "crc32" arg1 arg2 | CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2 | CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2 | CVTSI2SD (arg1, arg2) -> i2 b ("cvtsi2sd" ^ suf arg1) arg1 arg2 @@ -207,6 +208,8 @@ let print_instr b = function | POP arg -> i1_s b "pop" arg | POPCNT (arg1, arg2) -> i2_s b "popcnt" arg1 arg2 | PUSH arg -> i1_s b "push" arg + | RDTSC -> i0 b "rdtsc" + | RDPMC -> i0 b "rdpmc" | RET -> i0 b "ret" | ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2 | SAL (arg1, arg2) -> i2_s b "sal" arg1 arg2 diff --git a/backend/x86_masm.ml b/backend/x86_masm.ml index 263f6185360..5c4204b865e 100644 --- a/backend/x86_masm.ml +++ b/backend/x86_masm.ml @@ -128,6 +128,7 @@ let print_instr b = function | CMP (arg1, arg2) -> i2 b "cmp" arg1 arg2 | COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2 | CQO -> i0 b "cqo" + | CRC32 (arg1, arg2) -> i2 b "crc32q" arg1 arg2 | CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2 | CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2 | CVTSI2SD (arg1, arg2) -> i2 b "cvtsi2sd" arg1 arg2 @@ -199,6 +200,8 @@ let print_instr b = function | POP arg -> i1 b "pop" arg | POPCNT (arg1, arg2) -> i2 b "popcnt" arg1 arg2 | PUSH arg -> i1 b "push" arg + | RDTSC -> i0 b "rdtsc" + | RDPMC -> i0 b "rdpmc" | RET -> i0 b "ret" | ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2 | SAL (arg1, arg2) -> i2 b "sal" arg1 arg2