Skip to content

Commit e633807

Browse files
authored
[NVPTX] Add builtin support for 'globaltimer' (#79765)
Summary: This patch adds support for `globaltimer` to match `clock` and `clock64`. See the PTX ISA reference for details. This patch does not implement the `hi` or `lo` variants for brevity as they can be obtained from this with the cost of an additional register. https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer-globaltimer-lo-globaltimer-hi
1 parent ea80140 commit e633807

File tree

5 files changed

+20
-1
lines changed

5 files changed

+20
-1
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
148148

149149
BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
150150
BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
151+
BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")
151152

152153
BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
153154
BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,13 @@ __device__ long long read_clocks() {
134134

135135
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
136136
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
137+
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
137138

138139
int a = __nvvm_read_ptx_sreg_clock();
139140
long long b = __nvvm_read_ptx_sreg_clock64();
141+
long long c = __nvvm_read_ptx_sreg_globaltimer();
140142

141-
return a + b;
143+
return a + b + c;
142144
}
143145

144146
__device__ int read_pms() {

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4510,6 +4510,8 @@ def int_nvvm_read_ptx_sreg_lanemask_gt :
45104510
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
45114511
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
45124512

4513+
def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
4514+
45134515
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
45144516
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
45154517
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6376,6 +6376,8 @@ def INT_PTX_SREG_CLOCK :
63766376
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
63776377
def INT_PTX_SREG_CLOCK64 :
63786378
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6379+
def INT_PTX_SREG_GLOBALTIMER :
6380+
PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
63796381

63806382
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
63816383
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,17 @@ define void @test_exit() {
140140
ret void
141141
}
142142

143+
; CHECK-LABEL: test_globaltimer
144+
define i64 @test_globaltimer() {
145+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
146+
%a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
147+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
148+
%b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
149+
%ret = add i64 %a, %b
150+
; CHECK: ret
151+
ret i64 %ret
152+
}
153+
143154
declare float @llvm.fabs.f32(float)
144155
declare double @llvm.fabs.f64(double)
145156
declare float @llvm.nvvm.sqrt.f(float)
@@ -154,3 +165,4 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
154165
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
155166
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
156167
declare void @llvm.nvvm.exit()
168+
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()

0 commit comments

Comments
 (0)