@@ -225,13 +225,12 @@ namespace RadeonRays
225
225
// Mutex to guard cv
226
226
std::mutex mutex;
227
227
// Indicates if we need to shutdown all the threads
228
- std::atomic< bool > shutdown;
228
+ bool shutdown = false ;
229
229
// Number of primitives processed so far
230
230
std::atomic<std::uint32_t > num_refs_processed;
231
+ num_refs_processed.store (0 );
231
232
232
- num_refs_processed.store (0 );
233
- shutdown.store (false );
234
-
233
+ // Push root request
235
234
requests.push (SplitRequest{
236
235
scene_min,
237
236
scene_max,
@@ -243,33 +242,42 @@ namespace RadeonRays
243
242
0u
244
243
});
245
244
245
+ // Worker build function
246
246
auto worker_thread = [&]()
247
247
{
248
+ // Local stack for requests
248
249
thread_local std::stack<SplitRequest> local_requests;
249
250
251
+ // Thread loop
250
252
for (;;)
251
253
{
252
254
// Wait for signal
253
255
{
256
+ // Wait on the global stack to receive a request
254
257
std::unique_lock<std::mutex> lock (mutex);
255
258
cv.wait (lock, [&]() { return !requests.empty () || shutdown; });
256
259
260
+ // If we have been awaken by shutdown, we need to leave asap
257
261
if (shutdown) return ;
258
-
262
+ // Otherwise take a request from global stack and put it
263
+ // into our local stack
259
264
local_requests.push (requests.top ());
260
265
requests.pop ();
261
266
}
262
267
268
+ // Allocated space for requests
263
269
_MM_ALIGN16 SplitRequest request;
264
270
_MM_ALIGN16 SplitRequest request_left;
265
271
_MM_ALIGN16 SplitRequest request_right;
266
272
267
- // Process local requests
273
+ // Start handling local stack of requests
268
274
while (!local_requests.empty ())
269
275
{
276
+ // Pop next request
270
277
request = local_requests.top ();
271
278
local_requests.pop ();
272
279
280
+ // Handle it
273
281
auto node_type = HandleRequest (
274
282
request,
275
283
aabb_min,
@@ -281,28 +289,40 @@ namespace RadeonRays
281
289
request_left,
282
290
request_right);
283
291
292
+ // If it is a leaf, update number of processed primitives
293
+ // and continue
284
294
if (node_type == kLeaf )
285
295
{
286
296
num_refs_processed += static_cast <std::uint32_t >(request.num_refs );
287
297
continue ;
288
298
}
289
299
290
- if (request_right.num_refs > 4096u )
300
+ // Here we know we have just built and internal node,
301
+ // so we are going to handle its left child on this thread and
302
+ // its right child on:
303
+ // - this thread if it is small
304
+ // - another thread if it is huge (since this one is going to handle left child)
305
+ if (request_right.num_refs > 2048u )
291
306
{
307
+ // Put request into the global queue
292
308
std::unique_lock<std::mutex> lock (mutex);
293
309
requests.push (request_right);
310
+ // Wake up one of the workers
294
311
cv.notify_one ();
295
312
}
296
313
else
297
314
{
315
+ // Put small request into the local queue
298
316
local_requests.push (request_right);
299
317
}
300
318
319
+ // Put left request to local stack (always handled on this thread)
301
320
local_requests.push (request_left);
302
321
}
303
322
}
304
323
};
305
324
325
+ // Launch several threads
306
326
auto num_threads = std::thread::hardware_concurrency ();
307
327
std::vector<std::thread> threads (num_threads);
308
328
@@ -311,15 +331,19 @@ namespace RadeonRays
311
331
threads[i] = std::thread (worker_thread);
312
332
}
313
333
334
+ // Wait until all primitives are handled
314
335
while (num_refs_processed != num_aabbs)
315
336
{
316
337
std::this_thread::sleep_for (std::chrono::milliseconds (20 ));
317
338
}
318
339
319
- // Signal shutdown and wake up all the threads
320
- shutdown.store (true );
321
- cv.notify_all ();
322
-
340
+ // Signal shutdown and wake up all the threads
341
+ {
342
+ std::unique_lock<std::mutex> lock (mutex);
343
+ shutdown = true ;
344
+ cv.notify_all ();
345
+ }
346
+
323
347
// Wait for all the threads to finish
324
348
for (auto i = 0u ; i < num_threads; ++i)
325
349
{
0 commit comments