Skip to content

Commit 3bf3e2d

Browse files
committed
Add base case for wg size
Fix bug that was causing infinite loop in helper func
1 parent 2447fe1 commit 3bf3e2d

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

source/ur/ur.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@ template <typename T> class Result {
329329
static inline void
330330
roundToHighestFactorOfGlobalSize(size_t &ThreadsPerBlockInDim,
331331
const size_t GlobalWorkSizeInDim) {
332-
while (GlobalWorkSizeInDim % ThreadsPerBlockInDim) {
332+
while (ThreadsPerBlockInDim > 1 &&
333+
GlobalWorkSizeInDim % ThreadsPerBlockInDim) {
333334
--ThreadsPerBlockInDim;
334335
}
335336
}
@@ -359,8 +360,10 @@ static inline void roundToHighestFactorOfGlobalSizeIn3d(
359360

360361
ThreadsPerBlock[0] = std::min(
361362
GlobalSize[0], MaxBlockSize / (ThreadsPerBlock[1] * ThreadsPerBlock[2]));
363+
362364
// Make the X dim a factor of 2
363365
do {
364366
roundToHighestFactorOfGlobalSize(ThreadsPerBlock[0], GlobalSize[0]);
365-
} while (!isPowerOf2(ThreadsPerBlock[0]));
367+
} while (!isPowerOf2(ThreadsPerBlock[0]) && ThreadsPerBlock[0] > 32 &&
368+
--ThreadsPerBlock[0]);
366369
}

0 commit comments

Comments
 (0)