Skip to content

Commit b59049a

Browse files
authored
Merge pull request #187 from gboisse/expose_bvh
Some Unity fixes
2 parents 8aa95c1 + d4d3ee8 commit b59049a

File tree

7 files changed

+92
-33
lines changed

7 files changed

+92
-33
lines changed

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ elseif (UNIX)
2222
add_definitions(-fvisibility=hidden)
2323
endif (WIN32)
2424

25+
set(CMAKE_DEBUG_POSTFIX D)
2526
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
2627
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
2728

@@ -65,5 +66,3 @@ if (NOT RR_NO_TESTS)
6566
add_subdirectory(Gtest)
6667
add_subdirectory(UnitTest)
6768
endif (NOT RR_NO_TESTS)
68-
69-

RadeonRays/src/accelerator/bvh2.cpp

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,12 @@ namespace RadeonRays
225225
// Mutex to guard cv
226226
std::mutex mutex;
227227
// Indicates if we need to shutdown all the threads
228-
std::atomic<bool> shutdown;
228+
bool shutdown = false;
229229
// Number of primitives processed so far
230230
std::atomic<std::uint32_t> num_refs_processed;
231+
num_refs_processed.store(0);
231232

232-
num_refs_processed.store(0);
233-
shutdown.store(false);
234-
233+
// Push root request
235234
requests.push(SplitRequest{
236235
scene_min,
237236
scene_max,
@@ -243,33 +242,42 @@ namespace RadeonRays
243242
0u
244243
});
245244

245+
// Worker build function
246246
auto worker_thread = [&]()
247247
{
248+
// Local stack for requests
248249
thread_local std::stack<SplitRequest> local_requests;
249250

251+
// Thread loop
250252
for (;;)
251253
{
252254
// Wait for signal
253255
{
256+
// Wait on the global stack to receive a request
254257
std::unique_lock<std::mutex> lock(mutex);
255258
cv.wait(lock, [&]() { return !requests.empty() || shutdown; });
256259

260+
// If we have been awaken by shutdown, we need to leave asap
257261
if (shutdown) return;
258-
262+
// Otherwise take a request from global stack and put it
263+
// into our local stack
259264
local_requests.push(requests.top());
260265
requests.pop();
261266
}
262267

268+
// Allocated space for requests
263269
_MM_ALIGN16 SplitRequest request;
264270
_MM_ALIGN16 SplitRequest request_left;
265271
_MM_ALIGN16 SplitRequest request_right;
266272

267-
// Process local requests
273+
// Start handling local stack of requests
268274
while (!local_requests.empty())
269275
{
276+
// Pop next request
270277
request = local_requests.top();
271278
local_requests.pop();
272279

280+
// Handle it
273281
auto node_type = HandleRequest(
274282
request,
275283
aabb_min,
@@ -281,28 +289,40 @@ namespace RadeonRays
281289
request_left,
282290
request_right);
283291

292+
// If it is a leaf, update number of processed primitives
293+
// and continue
284294
if (node_type == kLeaf)
285295
{
286296
num_refs_processed += static_cast<std::uint32_t>(request.num_refs);
287297
continue;
288298
}
289299

290-
if (request_right.num_refs > 4096u)
300+
// Here we know we have just built and internal node,
301+
// so we are going to handle its left child on this thread and
302+
// its right child on:
303+
// - this thread if it is small
304+
// - another thread if it is huge (since this one is going to handle left child)
305+
if (request_right.num_refs > 2048u)
291306
{
307+
// Put request into the global queue
292308
std::unique_lock<std::mutex> lock(mutex);
293309
requests.push(request_right);
310+
// Wake up one of the workers
294311
cv.notify_one();
295312
}
296313
else
297314
{
315+
// Put small request into the local queue
298316
local_requests.push(request_right);
299317
}
300318

319+
// Put left request to local stack (always handled on this thread)
301320
local_requests.push(request_left);
302321
}
303322
}
304323
};
305324

325+
// Launch several threads
306326
auto num_threads = std::thread::hardware_concurrency();
307327
std::vector<std::thread> threads(num_threads);
308328

@@ -311,15 +331,19 @@ namespace RadeonRays
311331
threads[i] = std::thread(worker_thread);
312332
}
313333

334+
// Wait until all primitives are handled
314335
while (num_refs_processed != num_aabbs)
315336
{
316337
std::this_thread::sleep_for(std::chrono::milliseconds(20));
317338
}
318339

319-
// Signal shutdown and wake up all the threads
320-
shutdown.store(true);
321-
cv.notify_all();
322-
340+
// Signal shutdown and wake up all the threads
341+
{
342+
std::unique_lock<std::mutex> lock(mutex);
343+
shutdown = true;
344+
cv.notify_all();
345+
}
346+
323347
// Wait for all the threads to finish
324348
for (auto i = 0u; i < num_threads; ++i)
325349
{

RadeonRays/src/api/radeon_rays.cpp

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ THE SOFTWARE.
5252
#define HANDLE_TYPE HMODULE
5353

5454
#ifndef _DEBUG
55-
#define LIBNAME "Calc64.dll"
56-
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
55+
#define LIBNAME "Calc.dll"
56+
#define LONGNAME "../bin/Release/##LIBNAME"
5757
#else
58-
#define LIBNAME "Calc64D.dll"
59-
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
58+
#define LIBNAME "CalcD.dll"
59+
#define LONGNAME "../bin/Debug/##LIBNAME"
6060
#endif
6161
#elif __linux__
6262
// Linux
@@ -66,11 +66,11 @@ THE SOFTWARE.
6666
#define HANDLE_TYPE void*
6767

6868
#ifndef _DEBUG
69-
#define LIBNAME "libCalc64.so"
70-
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
69+
#define LIBNAME "libCalc.so"
70+
#define LONGNAME "../bin/Release/##LIBNAME"
7171
#else
72-
#define LIBNAME "libCalc64D.so"
73-
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
72+
#define LIBNAME "libCalcD.so"
73+
#define LONGNAME "../bin/Debug/##LIBNAME"
7474
#endif
7575
#else
7676
// MacOS
@@ -80,11 +80,11 @@ THE SOFTWARE.
8080
#define HANDLE_TYPE void*
8181

8282
#ifndef _DEBUG
83-
#define LIBNAME "libCalc64.dylib"
84-
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
83+
#define LIBNAME "libCalc.dylib"
84+
#define LONGNAME "../bin/Release/##LIBNAME"
8585
#else
86-
#define LIBNAME "libCalc64D.dylib"
87-
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
86+
#define LIBNAME "libCalcD.dylib"
87+
#define LONGNAME "../bin/Debug/##LIBNAME"
8888
#endif
8989
#endif
9090
#endif
@@ -287,6 +287,19 @@ namespace RadeonRays
287287
#ifdef USE_OPENCL
288288
RRAPI IntersectionApi* CreateFromOpenClContext(cl_context context, cl_device_id device, cl_command_queue queue)
289289
{
290+
if (s_calc_platform == DeviceInfo::kEmbree)
291+
{
292+
IntersectionApi* api = nullptr;
293+
#ifdef USE_EMBREE
294+
api = IntersectionApi::Create(0);
295+
if (!api)
296+
return nullptr;
297+
EmbreeIntersectionDevice* device = static_cast<EmbreeIntersectionDevice*>(static_cast<IntersectionApiImpl*>(api)->GetDevice());
298+
device->SetCommandQueue(queue);
299+
#endif
300+
return api;
301+
}
302+
290303
auto calc = dynamic_cast<Calc::Calc*>(GetCalcOpenCL());
291304

292305
if (calc)

RadeonRays/src/device/embree_intersection_device.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,8 @@ namespace RadeonRays
109109
};
110110

111111
EmbreeIntersectionDevice::EmbreeIntersectionDevice()
112-
: m_pool(1)
112+
: m_command_queue(nullptr)
113+
, m_pool(1)
113114
{
114115
m_device = rtcNewDevice(nullptr);
115116
RTCError result = rtcDeviceGetError(m_device);
@@ -488,12 +489,18 @@ namespace RadeonRays
488489

489490
void EmbreeIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const
490491
{
491-
Throw("Not implemented for embree device.");
492+
if (m_command_queue)
493+
clFinish(m_command_queue); // wait for kernels to complete so numrays is available
494+
const EmbreeBuffer* fireNumrays = dynamic_cast<const EmbreeBuffer*>(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer.");
495+
QueryIntersection(rays, std::min(*static_cast<const int*>(fireNumrays->GetData()), maxrays), hits, waitevent, event);
492496
}
493497

494498
void EmbreeIntersectionDevice::QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const
495499
{
496-
Throw("Not implemented for embree device.");
500+
if (m_command_queue)
501+
clFinish(m_command_queue); // wait for kernels to complete so numrays is available
502+
const EmbreeBuffer* fireNumrays = dynamic_cast<const EmbreeBuffer*>(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer.");
503+
QueryOcclusion(rays, std::min(*static_cast<const int*>(fireNumrays->GetData()), maxrays), hits, waitevent, event);
497504
}
498505

499506
void* EmbreeIntersectionDevice::GetBvh() const

RadeonRays/src/device/embree_intersection_device.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ THE SOFTWARE.
2727
#include <embree2/rtcore.h>
2828
#include "../async/thread_pool.h"
2929

30+
#ifdef __APPLE__
31+
#include <OpenCL/OpenCL.h>
32+
#else
33+
#include <CL/cl.h>
34+
#endif
35+
3036
namespace RadeonRays
3137
{
3238
class Mesh;
@@ -54,7 +60,10 @@ namespace RadeonRays
5460
void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override;
5561
void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override;
5662
void* GetBvh() const override;
57-
63+
64+
inline cl_command_queue GetCommandQueue() const { return m_command_queue; }
65+
inline void SetCommandQueue(cl_command_queue command_queue) { m_command_queue = command_queue; }
66+
5867
protected:
5968
RTCScene GetEmbreeMesh(const Mesh*);
6069
void UpdateShape(const ShapeImpl*);
@@ -66,9 +75,10 @@ namespace RadeonRays
6675

6776
// embree device
6877
RTCDevice m_device;
69-
78+
// Unity hack: OpenCL command queue
79+
cl_command_queue m_command_queue;
7080
// scene for intersection
71-
RTCScene m_scene;
81+
RTCScene m_scene;
7282

7383
//thread pool for parallelizing work with buffers
7484
mutable thread_pool<void> m_pool;

UnitTest/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ endif (RR_USE_EMBREE)
4444
add_executable(UnitTest ${SOURCES})
4545

4646
target_link_libraries(UnitTest PRIVATE GTest RadeonRays Calc)
47+
set_target_properties(UnitTest PROPERTIES
48+
VS_DEBUGGER_WORKING_DIRECTORY ${RadeonRaysSDK_SOURCE_DIR}/RadeonRays)
49+
4750
#Add root for unittests. They use private headers
4851
target_include_directories(UnitTest PRIVATE "${RadeonRaysSDK_SOURCE_DIR}")
4952

@@ -53,7 +56,6 @@ else (NOT RR_SHARED_CALC)
5356
target_compile_definitions(UnitTest PRIVATE CALC_STATIC_LIBRARY)
5457
endif (RR_SHARED_CALC)
5558

56-
5759
target_compile_features(UnitTest PRIVATE cxx_std_11)
5860
if (APPLE)
5961
target_compile_options(UnitTest PRIVATE -stdlib=libc++)
@@ -71,7 +73,6 @@ if (RR_USE_VULKAN)
7173
target_compile_definitions(UnitTest PRIVATE USE_VULKAN=1)
7274
endif (RR_USE_VULKAN)
7375

74-
7576
if (RR_ENABLE_RAYMASK)
7677
target_compile_definitions(UnitTest PRIVATE RR_RAY_MASK)
7778
endif (RR_ENABLE_RAYMASK)

UnitTest/radeon_rays_apitest_embree.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,13 @@ TEST_F(ApiBackendEmbree, Intersection_1Ray)
268268
}
269269

270270

271+
#ifdef RR_RAY_MASK
271272
// The test creates a single triangle mesh and tests attach/detach functionality
272273
TEST_F(ApiBackendEmbree, Intersection_1Ray_Masked)
274+
#else
275+
// The test creates a single triangle mesh and tests attach/detach functionality
276+
TEST_F(ApiBackendEmbree, DISABLED_Intersection_1Ray_Masked)
277+
#endif
273278
{
274279
Shape* mesh = nullptr;
275280

0 commit comments

Comments
 (0)