Skip to content

Commit 5f12cc9

Browse files
authored
[NVPTX] Add builtin support for 'nanosleep' PTX instrunction (#79888)
Summary: This patch adds a builtin for the `nanosleep` PTX function. It takes either an immediate or a register and sleeps for [0, 2t] nanoseconds given t. More information at the documentation: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-nanosleep
1 parent d492faa commit 5f12cc9

File tree

5 files changed

+42
-0
lines changed

5 files changed

+42
-0
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
157157
// MISC
158158

159159
BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
160+
TARGET_BUILTIN(__nvvm_nanosleep, "vi", "n", AND(SM_70, PTX63))
160161

161162
// Min Max
162163

clang/test/CodeGen/builtins-nvptx.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,17 @@ __device__ void nvvm_vote(int pred) {
818818
// CHECK: ret void
819819
}
820820

821+
// CHECK-LABEL: nvvm_nanosleep
822+
__device__ void nvvm_nanosleep(int d) {
823+
#if __CUDA_ARCH__ >= 700
824+
// CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep
825+
__nvvm_nanosleep(d);
826+
827+
// CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep
828+
__nvvm_nanosleep(1);
829+
#endif
830+
}
831+
821832
// CHECK-LABEL: nvvm_mbarrier
822833
__device__ void nvvm_mbarrier(long long* addr, __attribute__((address_space(3))) long long* sharedAddr, int count, long long state) {
823834
#if __CUDA_ARCH__ >= 800

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,10 @@ let TargetPrefix = "nvvm" in {
557557
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
558558
[IntrNoMem, IntrSpeculatable]>;
559559

560+
def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">,
561+
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
562+
[IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
563+
560564
//
561565
// Min Max
562566
//

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,12 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
640640
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
641641
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
642642

643+
def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
644+
[(int_nvvm_nanosleep imm:$i)]>,
645+
Requires<[hasPTX<63>, hasSM<70>]>;
646+
def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
647+
[(int_nvvm_nanosleep Int32Regs:$i)]>,
648+
Requires<[hasPTX<63>, hasSM<70>]>;
643649
//
644650
// Min Max
645651
//

llvm/test/CodeGen/NVPTX/nanosleep.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s
2+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify %}
3+
4+
declare void @llvm.nvvm.nanosleep(i32)
5+
6+
; CHECK-LABEL: test_nanosleep_r
7+
define void @test_nanosleep_r(i32 noundef %d) {
8+
entry:
9+
; CHECK: nanosleep.u32 %[[REG:.+]];
10+
call void @llvm.nvvm.nanosleep(i32 %d)
11+
ret void
12+
}
13+
14+
; CHECK-LABEL: test_nanosleep_i
15+
define void @test_nanosleep_i() {
16+
entry:
17+
; CHECK: nanosleep.u32 42;
18+
call void @llvm.nvvm.nanosleep(i32 42)
19+
ret void
20+
}

0 commit comments

Comments
 (0)