Skip to content

Commit e225677

Browse files
authored
[NVPTX] Update setmaxnreg intrinsic lowering (#125846)
The setmaxnreg PTX instruction is supported on all arch-conditionals, known up-to cuda-12.8, from sm90 onwards. This patch updates the predicate checks to handle this. The feature is additionally tested in setmaxnreg-sm100a.ll
1 parent f2ac265 commit e225677

File tree

3 files changed

+15
-1
lines changed

3 files changed

+15
-1
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def hasLDU : Predicate<"Subtarget->hasLDU()">;
142142
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
143143
def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;
144144
def hasOptEnabled : Predicate<"TM.getOptLevel() != CodeGenOptLevel::None">;
145+
def hasAcceleratedFeatures : Predicate<"Subtarget->hasAAFeatures()">;
145146

146147
def doF32FTZ : Predicate<"useF32FTZ()">;
147148
def doNoF32FTZ : Predicate<"!useF32FTZ()">;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7547,7 +7547,7 @@ multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
75477547
def : NVPTXInst<(outs), (ins i32imm:$reg_count),
75487548
"setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
75497549
[(Intr timm:$reg_count)]>,
7550-
Requires<[hasSM90a, hasPTX<80>]>;
7550+
Requires<[hasAcceleratedFeatures, hasSM<90>, hasPTX<80>]>;
75517551
}
75527552

75537553
defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | FileCheck --check-prefixes=CHECK %s
2+
; RUN: %if ptxas-12.8 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %}
3+
4+
; CHECK-LABEL: test_set_maxn_reg_sm100a
5+
define void @test_set_maxn_reg_sm100a() {
6+
; CHECK: setmaxnreg.inc.sync.aligned.u32 96;
7+
call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 96)
8+
9+
; CHECK: setmaxnreg.dec.sync.aligned.u32 64;
10+
call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 64)
11+
12+
ret void
13+
}

0 commit comments

Comments
 (0)