@@ -81,6 +81,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
81
81
: RegisterRegAllocBase(N, D, C) {}
82
82
};
83
83
84
+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
85
+ public:
86
+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
87
+ : RegisterRegAllocBase(N, D, C) {}
88
+ };
89
+
84
90
static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
85
91
const MachineRegisterInfo &MRI,
86
92
const Register Reg) {
@@ -95,13 +101,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
95
101
return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
96
102
}
97
103
98
- // / -{sgpr|vgpr}-regalloc=... command line option.
104
+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
105
+ const MachineRegisterInfo &MRI,
106
+ const Register Reg) {
107
+ const SIMachineFunctionInfo *MFI =
108
+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
109
+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
110
+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
111
+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
112
+ }
113
+
114
+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
99
115
static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
100
116
101
117
// / A dummy default pass factory indicates whether the register allocator is
102
118
// / overridden on the command line.
103
119
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
104
120
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
121
+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
105
122
106
123
static SGPRRegisterRegAlloc
107
124
defaultSGPRRegAlloc (" default" ,
@@ -118,6 +135,12 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
118
135
VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
119
136
cl::desc (" Register allocator to use for VGPRs" ));
120
137
138
+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
139
+ RegisterPassParser<WWMRegisterRegAlloc>>
140
+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
141
+ cl::init (&useDefaultRegisterAllocator),
142
+ cl::desc(" Register allocator to use for WWM registers" ));
143
+
121
144
static void initializeDefaultSGPRRegisterAllocatorOnce () {
122
145
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
123
146
@@ -136,6 +159,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
136
159
}
137
160
}
138
161
162
+ static void initializeDefaultWWMRegisterAllocatorOnce () {
163
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
164
+
165
+ if (!Ctor) {
166
+ Ctor = WWMRegAlloc;
167
+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
168
+ }
169
+ }
170
+
139
171
static FunctionPass *createBasicSGPRRegisterAllocator () {
140
172
return createBasicRegisterAllocator (onlyAllocateSGPRs);
141
173
}
@@ -160,6 +192,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
160
192
return createFastRegisterAllocator (onlyAllocateVGPRs, true );
161
193
}
162
194
195
+ static FunctionPass *createBasicWWMRegisterAllocator () {
196
+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
197
+ }
198
+
199
+ static FunctionPass *createGreedyWWMRegisterAllocator () {
200
+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
201
+ }
202
+
203
+ static FunctionPass *createFastWWMRegisterAllocator () {
204
+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
205
+ }
206
+
163
207
static SGPRRegisterRegAlloc basicRegAllocSGPR (
164
208
" basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
165
209
static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -176,7 +220,16 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
176
220
177
221
static VGPRRegisterRegAlloc fastRegAllocVGPR (
178
222
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
179
- }
223
+
224
+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
225
+ " basic register allocator" ,
226
+ createBasicWWMRegisterAllocator);
227
+ static WWMRegisterRegAlloc
228
+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
229
+ createGreedyWWMRegisterAllocator);
230
+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
231
+ createFastWWMRegisterAllocator);
232
+ } // namespace
180
233
181
234
static cl::opt<bool >
182
235
EnableEarlyIfConversion (" amdgpu-early-ifcvt" , cl::Hidden,
@@ -417,6 +470,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
417
470
initializeAMDGPULateCodeGenPreparePass (*PR);
418
471
initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
419
472
initializeAMDGPULowerModuleLDSLegacyPass (*PR);
473
+ initializeAMDGPUReserveWWMRegsPass (*PR);
420
474
initializeAMDGPURewriteOutArgumentsPass (*PR);
421
475
initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
422
476
initializeAMDGPUUnifyMetadataPass (*PR);
@@ -1002,6 +1056,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
1002
1056
1003
1057
FunctionPass *createSGPRAllocPass (bool Optimized);
1004
1058
FunctionPass *createVGPRAllocPass (bool Optimized);
1059
+ FunctionPass *createWWMRegAllocPass (bool Optimized);
1005
1060
FunctionPass *createRegAllocPass (bool Optimized) override ;
1006
1061
1007
1062
bool addRegAssignAndRewriteFast () override ;
@@ -1394,7 +1449,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
1394
1449
}
1395
1450
1396
1451
bool GCNPassConfig::addPreRewrite () {
1397
- addPass (&SILowerWWMCopiesID);
1398
1452
if (EnableRegReassign)
1399
1453
addPass (&GCNNSAReassignID);
1400
1454
return true ;
@@ -1430,12 +1484,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1430
1484
return createFastVGPRRegisterAllocator ();
1431
1485
}
1432
1486
1487
+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1488
+ // Initialize the global default.
1489
+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1490
+ initializeDefaultWWMRegisterAllocatorOnce);
1491
+
1492
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1493
+ if (Ctor != useDefaultRegisterAllocator)
1494
+ return Ctor ();
1495
+
1496
+ if (Optimized)
1497
+ return createGreedyWWMRegisterAllocator ();
1498
+
1499
+ return createFastWWMRegisterAllocator ();
1500
+ }
1501
+
1433
1502
FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
1434
1503
llvm_unreachable (" should not be used" );
1435
1504
}
1436
1505
1437
1506
static const char RegAllocOptNotSupportedMessage[] =
1438
- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1507
+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1508
+ " and -vgpr-regalloc" ;
1439
1509
1440
1510
bool GCNPassConfig::addRegAssignAndRewriteFast () {
1441
1511
if (!usingDefaultRegAlloc ())
@@ -1447,11 +1517,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
1447
1517
1448
1518
// Equivalent of PEI for SGPRs.
1449
1519
addPass (&SILowerSGPRSpillsID);
1520
+
1521
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1450
1522
addPass (&SIPreAllocateWWMRegsID);
1451
1523
1452
- addPass (createVGPRAllocPass (false ));
1524
+ // For allocating other wwm register operands.
1525
+ addPass (createWWMRegAllocPass (false ));
1453
1526
1454
1527
addPass (&SILowerWWMCopiesID);
1528
+ addPass (&AMDGPUReserveWWMRegsID);
1529
+
1530
+ // For allocating regular VGPRs.
1531
+ addPass (createVGPRAllocPass (false ));
1532
+
1455
1533
return true ;
1456
1534
}
1457
1535
@@ -1471,8 +1549,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1471
1549
1472
1550
// Equivalent of PEI for SGPRs.
1473
1551
addPass (&SILowerSGPRSpillsID);
1552
+
1553
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1474
1554
addPass (&SIPreAllocateWWMRegsID);
1475
1555
1556
+ // For allocating other whole wave mode registers.
1557
+ addPass (createWWMRegAllocPass (true ));
1558
+ addPass (&SILowerWWMCopiesID);
1559
+ addPass (createVirtRegRewriter (false ));
1560
+ addPass (&AMDGPUReserveWWMRegsID);
1561
+
1562
+ // For allocating regular VGPRs.
1476
1563
addPass (createVGPRAllocPass (true ));
1477
1564
1478
1565
addPreRewrite ();
0 commit comments