Skip to content

Commit b3670d5

Browse files
gretay-jspoechsel
authored andcommitted
Add intrinsics for rdtsc, rdpmc, crc32 (amd64) (#20)
1 parent a72cafb commit b3670d5

File tree

11 files changed

+111
-3
lines changed

11 files changed

+111
-3
lines changed

backend/amd64/CSE.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ method! class_of_operation op =
3232
| Ioffset_loc(_, _) -> Op_store true
3333
| Ifloatarithmem _ | Ifloatsqrtf _ -> Op_load
3434
| Ibswap _ | Isqrtf -> super#class_of_operation op
35+
| Irdtsc | Irdpmc -> Op_other
36+
| Icrc32q -> Op_pure
3537
end
3638
| _ -> super#class_of_operation op
3739

backend/amd64/arch.ml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
(* POPCNT instruction is not available prior to Nehalem, released in 2008. *)
1717
let popcnt_support = ref true
1818

19+
(* CRC32 requires SSE 4.2 support *)
20+
let crc32_support = ref true
21+
1922
(* Machine-specific command-line options *)
2023

2124
let command_line_options =
@@ -27,6 +30,10 @@ let command_line_options =
2730
" Use POPCNT instruction (not available prior to Nehalem)";
2831
"-fno-popcnt", Arg.Clear popcnt_support,
2932
" Do not use POPCNT instruction";
33+
"-fcrc32", Arg.Set crc32_support,
34+
" Use CRC32 instructions (requires SSE4.2 support)";
35+
"-fno-crc32", Arg.Clear crc32_support,
36+
" Do not emit CRC32 instructions";
3037
]
3138

3239
(* Specific operations for the AMD64 processor *)
@@ -54,6 +61,9 @@ type specific_operation =
5461
extension *)
5562
| Izextend32 (* 32 to 64 bit conversion with zero
5663
extension *)
64+
| Irdtsc (* read timestamp *)
65+
| Irdpmc (* read performance counter *)
66+
| Icrc32q (* compute crc *)
5767

5868
and float_operation =
5969
Ifloatadd | Ifloatsub | Ifloatmul | Ifloatdiv
@@ -141,6 +151,12 @@ let print_specific_operation printreg op ppf arg =
141151
fprintf ppf "sextend32 %a" printreg arg.(0)
142152
| Izextend32 ->
143153
fprintf ppf "zextend32 %a" printreg arg.(0)
154+
| Irdtsc ->
155+
fprintf ppf "rdtsc"
156+
| Irdpmc ->
157+
fprintf ppf "rdpmc %a" printreg arg.(0)
158+
| Icrc32q ->
159+
fprintf ppf "crc32 %a %a" printreg arg.(0) printreg arg.(1)
144160

145161
let win64 =
146162
match Config.system with

backend/amd64/emit.mlp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,26 @@ let emit_instr fallthrough i =
886886
| Lop(Iintop Ipopcnt) ->
887887
assert (!popcnt_support);
888888
I.popcnt (arg i 0) (res i 0)
889+
| Lop(Ispecific Irdtsc) ->
890+
assert (reg64 i.res.(0) = RDX);
891+
I.rdtsc ();
892+
(* The instruction fills in the low 32 bits of the result registers. *)
893+
(* Combine edx and eax into a single 64-bit result in rdx. *)
894+
I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *)
895+
(* On processors that support the Intel 64 architecture,
896+
the high-order 32 bits of each of RAX and RDX are cleared. *)
897+
I.or_ rax (res i 0) (* combine high and low into rdx *)
898+
| Lop(Ispecific Irdpmc) ->
899+
assert ((arg64 i 0 = RCX) && (reg64 i.res.(0) = RDX));
900+
I.rdpmc ();
901+
(* The instruction fills in the low 32 bits of the result registers. *)
902+
(* Combine edx and eax into a single 64-bit result in rdx. *)
903+
I.sal (int 32) (res i 0); (* shift edx to the high part of rdx *)
904+
I.mov eax eax; (* zero-extend eax *)
905+
I.or_ rax (res i 0) (* combine high and low into rdx *)
906+
| Lop (Ispecific Icrc32q) ->
907+
assert (arg i 0 = res i 0);
908+
I.crc32 (arg i 1) (res i 0)
889909
| Lop (Iname_for_debugger _) -> ()
890910
| Lop (Iprobe _) ->
891911
let probe_label = new_label () in

backend/amd64/proc.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ let destroyed_at_oper = function
312312
-> [| rax |]
313313
| Iswitch(_, _) -> [| rax; rdx |]
314314
| Itrywith _ -> [| r11 |]
315+
| Iop(Ispecific (Irdtsc | Irdpmc)) -> [| rax |]
315316
| _ ->
316317
if fp then
317318
(* prevent any use of the frame pointer ! *)

backend/amd64/reload.ml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ open Mach
4646
Iintoffloat R S
4747
Ispecific(Ilea) R R R
4848
Ispecific(Ifloatarithmem) R R R
49+
Ispecific(Icrc32q) R R S (and Res = Arg1)
50+
Ispecific(Irdtsc) R (and Res = rdx)
51+
Ispecific(Irdpmc) R R (and Res = rdx, Arg1 = rcx)
4952
5053
Conditional branches:
5154
Iinttest S R
@@ -86,6 +89,16 @@ method! reload_operation op arg res =
8689
if stackp arg.(0)
8790
then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
8891
else (arg, res)
92+
| Ispecific (Irdtsc | Irdpmc) ->
93+
(* Irdtsc: res(0) already forced in reg.
94+
Irdpmc: res(0) and arg(0) already forced in regs. *)
95+
(arg, res)
96+
| Ispecific Icrc32q ->
97+
(* First argument and result must be in the same register.
98+
Second argument can be either in a register or on stack. *)
99+
if stackp arg.(0)
100+
then (let r = self#makereg arg.(0) in ([|r; arg.(1)|], [|r|]))
101+
else (arg, res)
89102
| Ifloatofint | Iintoffloat ->
90103
(* Result must be in register, but argument can be on stack *)
91104
(arg, (if stackp res.(0) then [| self#makereg res.(0) |] else res))
@@ -97,7 +110,15 @@ method! reload_operation op arg res =
97110
if !Clflags.pic_code || !Clflags.dlcode || Arch.win64
98111
then super#reload_operation op arg res
99112
else (arg, res)
100-
| _ -> (* Other operations: all args and results in registers *)
113+
| Iintop (Ipopcnt | Iclz _| Ictz _)
114+
| Ispecific (Isqrtf | Isextend32 | Izextend32 | Ilea _
115+
| Istore_int (_, _, _)
116+
| Ioffset_loc (_, _) | Ifloatarithmem (_, _)
117+
| Ibswap _| Ifloatsqrtf _)
118+
| Imove|Ispill|Ireload|Inegf|Iabsf|Iconst_float _|Icall_ind|Icall_imm _
119+
| Itailcall_ind|Itailcall_imm _|Iextcall _|Istackoffset _|Iload (_, _)
120+
| Istore (_, _, _)|Ialloc _|Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _
121+
-> (* Other operations: all args and results in registers *)
101122
super#reload_operation op arg res
102123

103124
method! reload_test tst arg =

backend/amd64/selection.ml

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ let pseudoregs_for_operation op arg res =
9696
([| rax |], [| rax |])
9797
(* For imulq, first arg must be in rax, rax is clobbered, and result is in
9898
rdx. *)
99+
| Ispecific (Ibswap _) -> assert false
99100
| Iintop(Imulh) ->
100101
([| rax; arg.(1) |], [| rdx |])
101102
| Ispecific(Ifloatarithmem(_,_)) ->
@@ -112,8 +113,30 @@ let pseudoregs_for_operation op arg res =
112113
([| rax; rcx |], [| rax |])
113114
| Iintop(Imod) ->
114115
([| rax; rcx |], [| rdx |])
116+
| Ispecific Irdtsc ->
117+
(* For rdtsc instruction, the result is in edx (high) and eax (low).
118+
Make it simple and force the result in rdx and rax clobbered. *)
119+
([| |], [| rdx |])
120+
| Ispecific Irdpmc ->
121+
(* For rdpmc instruction, the argument must be in ecx
122+
and the result is in edx (high) and eax (low).
123+
Make it simple and force the argument in rcx, the result in rdx,
124+
and rax clobbered *)
125+
([| rcx |], [| rdx |])
126+
| Ispecific Icrc32q ->
127+
(* arg.(0) and res.(0) must be the same *)
128+
([|res.(0); arg.(1)|], res)
115129
(* Other instructions are regular *)
116-
| _ -> raise Use_default
130+
| Iintop (Ipopcnt|Iclz _|Ictz _|Icomp _|Icheckbound)
131+
| Iintop_imm ((Imulh|Idiv|Imod|Icomp _|Icheckbound
132+
|Ipopcnt|Iclz _|Ictz _), _)
133+
| Ispecific (Isqrtf|Isextend32|Izextend32|Ilea _|Istore_int (_, _, _)
134+
|Ioffset_loc (_, _)|Ifloatsqrtf _)
135+
| Imove|Ispill|Ireload|Ifloatofint|Iintoffloat|Iconst_int _|Iconst_float _
136+
| Iconst_symbol _|Icall_ind|Icall_imm _|Itailcall_ind|Itailcall_imm _
137+
| Iextcall _|Istackoffset _|Iload (_, _)|Istore (_, _, _)|Ialloc _
138+
| Iname_for_debugger _|Iprobe _|Iprobe_is_enabled _
139+
-> raise Use_default
117140

118141
(* If you update [inline_ops], you may need to update [is_simple_expr] and/or
119142
[effects_of], below. *)
@@ -210,7 +233,17 @@ method! select_operation op args dbg =
210233
(Ispecific Isqrtf, [arg])
211234
| _ ->
212235
assert false
213-
end
236+
end
237+
| Cextcall { name; builtin = true; ret; ty_args = _; } ->
238+
begin match name, ret with
239+
| "caml_rdtsc_unboxed", [|Int|] -> Ispecific Irdtsc, args
240+
| "caml_rdpmc_unboxed", [|Int|] -> Ispecific Irdpmc, args
241+
| ("caml_int64_crc_unboxed", [|Int|]
242+
| "caml_int_crc_untagged", [|Int|]) when !Arch.crc32_support ->
243+
Ispecific Icrc32q, args
244+
| _ ->
245+
super#select_operation op args dbg
246+
end
214247
(* Recognize store instructions *)
215248
| Cstore ((Word_int|Word_val as chunk), _init) ->
216249
begin match args with

backend/x86_ast.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ type instruction =
108108
| CMP of arg * arg
109109
| COMISD of arg * arg
110110
| CQO
111+
| CRC32 of arg * arg
111112
| CVTSD2SI of arg * arg
112113
| CVTSD2SS of arg * arg
113114
| CVTSI2SD of arg * arg
@@ -172,6 +173,8 @@ type instruction =
172173
| POP of arg
173174
| POPCNT of arg * arg
174175
| PUSH of arg
176+
| RDTSC
177+
| RDPMC
175178
| RET
176179
| ROUNDSD of rounding * arg * arg
177180
| SAL of arg * arg

backend/x86_dsl.ml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ module I = struct
122122
let cmp x y = emit (CMP (x, y))
123123
let comisd x y = emit (COMISD (x, y))
124124
let cqo () = emit CQO
125+
let crc32 x y = emit (CRC32 (x, y))
125126
let cvtsd2ss x y = emit (CVTSD2SS (x, y))
126127
let cvtsi2sd x y = emit (CVTSI2SD (x, y))
127128
let cvtss2sd x y = emit (CVTSS2SD (x, y))
@@ -190,6 +191,8 @@ module I = struct
190191
let pop x = emit (POP x)
191192
let popcnt x y = emit (POPCNT (x, y))
192193
let push x = emit (PUSH x)
194+
let rdtsc () = emit (RDTSC)
195+
let rdpmc () = emit (RDPMC)
193196
let ret () = emit RET
194197
let sal x y = emit (SAL (x, y))
195198
let sar x y = emit (SAR (x, y))

backend/x86_dsl.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ module I : sig
115115
val cmp: arg -> arg -> unit
116116
val comisd: arg -> arg -> unit
117117
val cqo: unit -> unit
118+
val crc32 : arg -> arg -> unit
118119
val cvtsd2ss: arg -> arg -> unit
119120
val cvtsi2sd: arg -> arg -> unit
120121
val cvtss2sd: arg -> arg -> unit
@@ -183,6 +184,8 @@ module I : sig
183184
val pop: arg -> unit
184185
val popcnt : arg -> arg -> unit
185186
val push: arg -> unit
187+
val rdtsc: unit -> unit
188+
val rdpmc: unit -> unit
186189
val ret: unit -> unit
187190
val sal: arg -> arg -> unit
188191
val sar: arg -> arg -> unit

backend/x86_gas.ml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ let print_instr b = function
131131
| CMP (arg1, arg2) -> i2_s b "cmp" arg1 arg2
132132
| COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2
133133
| CQO -> i0 b "cqto"
134+
| CRC32 (arg1, arg2) -> i2_s b "crc32" arg1 arg2
134135
| CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2
135136
| CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2
136137
| CVTSI2SD (arg1, arg2) -> i2 b ("cvtsi2sd" ^ suf arg1) arg1 arg2
@@ -207,6 +208,8 @@ let print_instr b = function
207208
| POP arg -> i1_s b "pop" arg
208209
| POPCNT (arg1, arg2) -> i2_s b "popcnt" arg1 arg2
209210
| PUSH arg -> i1_s b "push" arg
211+
| RDTSC -> i0 b "rdtsc"
212+
| RDPMC -> i0 b "rdpmc"
210213
| RET -> i0 b "ret"
211214
| ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2
212215
| SAL (arg1, arg2) -> i2_s b "sal" arg1 arg2

backend/x86_masm.ml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ let print_instr b = function
128128
| CMP (arg1, arg2) -> i2 b "cmp" arg1 arg2
129129
| COMISD (arg1, arg2) -> i2 b "comisd" arg1 arg2
130130
| CQO -> i0 b "cqo"
131+
| CRC32 (arg1, arg2) -> i2 b "crc32q" arg1 arg2
131132
| CVTSD2SI (arg1, arg2) -> i2 b "cvtsd2si" arg1 arg2
132133
| CVTSD2SS (arg1, arg2) -> i2 b "cvtsd2ss" arg1 arg2
133134
| CVTSI2SD (arg1, arg2) -> i2 b "cvtsi2sd" arg1 arg2
@@ -199,6 +200,8 @@ let print_instr b = function
199200
| POP arg -> i1 b "pop" arg
200201
| POPCNT (arg1, arg2) -> i2 b "popcnt" arg1 arg2
201202
| PUSH arg -> i1 b "push" arg
203+
| RDTSC -> i0 b "rdtsc"
204+
| RDPMC -> i0 b "rdpmc"
202205
| RET -> i0 b "ret"
203206
| ROUNDSD (r, arg1, arg2) -> i2 b (string_of_rounding r) arg1 arg2
204207
| SAL (arg1, arg2) -> i2 b "sal" arg1 arg2

0 commit comments

Comments
 (0)