diff --git a/CMakeLists.txt b/CMakeLists.txt index fe91c6aa..1902fc54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ elseif (UNIX) add_definitions(-fvisibility=hidden) endif (WIN32) +set(CMAKE_DEBUG_POSTFIX D) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) @@ -65,5 +66,3 @@ if (NOT RR_NO_TESTS) add_subdirectory(Gtest) add_subdirectory(UnitTest) endif (NOT RR_NO_TESTS) - - diff --git a/RadeonRays/src/accelerator/bvh2.cpp b/RadeonRays/src/accelerator/bvh2.cpp index 7770b956..bc645faa 100644 --- a/RadeonRays/src/accelerator/bvh2.cpp +++ b/RadeonRays/src/accelerator/bvh2.cpp @@ -225,13 +225,12 @@ namespace RadeonRays // Mutex to guard cv std::mutex mutex; // Indicates if we need to shutdown all the threads - std::atomic shutdown; + bool shutdown = false; // Number of primitives processed so far std::atomic num_refs_processed; + num_refs_processed.store(0); - num_refs_processed.store(0); - shutdown.store(false); - + // Push root request requests.push(SplitRequest{ scene_min, scene_max, @@ -243,33 +242,42 @@ namespace RadeonRays 0u }); + // Worker build function auto worker_thread = [&]() { + // Local stack for requests thread_local std::stack local_requests; + // Thread loop for (;;) { // Wait for signal { + // Wait on the global stack to receive a request std::unique_lock lock(mutex); cv.wait(lock, [&]() { return !requests.empty() || shutdown; }); + // If we have been awaken by shutdown, we need to leave asap if (shutdown) return; - + // Otherwise take a request from global stack and put it + // into our local stack local_requests.push(requests.top()); requests.pop(); } + // Allocated space for requests _MM_ALIGN16 SplitRequest request; _MM_ALIGN16 SplitRequest request_left; _MM_ALIGN16 SplitRequest request_right; - // Process local requests + // Start handling local stack of requests while (!local_requests.empty()) { + // Pop next request request = local_requests.top(); local_requests.pop(); + // Handle it auto node_type = HandleRequest( request, aabb_min, @@ -281,28 +289,40 @@ namespace RadeonRays request_left, request_right); + // If it is a leaf, update number of processed primitives + // and continue if (node_type == kLeaf) { num_refs_processed += static_cast(request.num_refs); continue; } - if (request_right.num_refs > 4096u) + // Here we know we have just built and internal node, + // so we are going to handle its left child on this thread and + // its right child on: + // - this thread if it is small + // - another thread if it is huge (since this one is going to handle left child) + if (request_right.num_refs > 2048u) { + // Put request into the global queue std::unique_lock lock(mutex); requests.push(request_right); + // Wake up one of the workers cv.notify_one(); } else { + // Put small request into the local queue local_requests.push(request_right); } + // Put left request to local stack (always handled on this thread) local_requests.push(request_left); } } }; + // Launch several threads auto num_threads = std::thread::hardware_concurrency(); std::vector threads(num_threads); @@ -311,15 +331,19 @@ namespace RadeonRays threads[i] = std::thread(worker_thread); } + // Wait until all primitives are handled while (num_refs_processed != num_aabbs) { std::this_thread::sleep_for(std::chrono::milliseconds(20)); } - // Signal shutdown and wake up all the threads - shutdown.store(true); - cv.notify_all(); - + // Signal shutdown and wake up all the threads + { + std::unique_lock lock(mutex); + shutdown = true; + cv.notify_all(); + } + // Wait for all the threads to finish for (auto i = 0u; i < num_threads; ++i) { diff --git a/RadeonRays/src/api/radeon_rays.cpp b/RadeonRays/src/api/radeon_rays.cpp index 199a2e6d..c1664fa6 100644 --- a/RadeonRays/src/api/radeon_rays.cpp +++ b/RadeonRays/src/api/radeon_rays.cpp @@ -52,11 +52,11 @@ THE SOFTWARE. #define HANDLE_TYPE HMODULE #ifndef _DEBUG -#define LIBNAME "Calc64.dll" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "Calc.dll" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "Calc64D.dll" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "CalcD.dll" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #elif __linux__ // Linux @@ -66,11 +66,11 @@ THE SOFTWARE. #define HANDLE_TYPE void* #ifndef _DEBUG -#define LIBNAME "libCalc64.so" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "libCalc.so" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "libCalc64D.so" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "libCalcD.so" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #else // MacOS @@ -80,11 +80,11 @@ THE SOFTWARE. #define HANDLE_TYPE void* #ifndef _DEBUG -#define LIBNAME "libCalc64.dylib" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "libCalc.dylib" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "libCalc64D.dylib" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "libCalcD.dylib" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #endif #endif @@ -287,6 +287,19 @@ namespace RadeonRays #ifdef USE_OPENCL RRAPI IntersectionApi* CreateFromOpenClContext(cl_context context, cl_device_id device, cl_command_queue queue) { + if (s_calc_platform == DeviceInfo::kEmbree) + { + IntersectionApi* api = nullptr; +#ifdef USE_EMBREE + api = IntersectionApi::Create(0); + if (!api) + return nullptr; + EmbreeIntersectionDevice* device = static_cast(static_cast(api)->GetDevice()); + device->SetCommandQueue(queue); +#endif + return api; + } + auto calc = dynamic_cast(GetCalcOpenCL()); if (calc) diff --git a/RadeonRays/src/device/embree_intersection_device.cpp b/RadeonRays/src/device/embree_intersection_device.cpp index f28ee451..90aaf6f3 100644 --- a/RadeonRays/src/device/embree_intersection_device.cpp +++ b/RadeonRays/src/device/embree_intersection_device.cpp @@ -109,7 +109,8 @@ namespace RadeonRays }; EmbreeIntersectionDevice::EmbreeIntersectionDevice() - : m_pool(1) + : m_command_queue(nullptr) + , m_pool(1) { m_device = rtcNewDevice(nullptr); RTCError result = rtcDeviceGetError(m_device); @@ -488,12 +489,18 @@ namespace RadeonRays void EmbreeIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const { - Throw("Not implemented for embree device."); + if (m_command_queue) + clFinish(m_command_queue); // wait for kernels to complete so numrays is available + const EmbreeBuffer* fireNumrays = dynamic_cast(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer."); + QueryIntersection(rays, std::min(*static_cast(fireNumrays->GetData()), maxrays), hits, waitevent, event); } void EmbreeIntersectionDevice::QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const { - Throw("Not implemented for embree device."); + if (m_command_queue) + clFinish(m_command_queue); // wait for kernels to complete so numrays is available + const EmbreeBuffer* fireNumrays = dynamic_cast(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer."); + QueryOcclusion(rays, std::min(*static_cast(fireNumrays->GetData()), maxrays), hits, waitevent, event); } void* EmbreeIntersectionDevice::GetBvh() const diff --git a/RadeonRays/src/device/embree_intersection_device.h b/RadeonRays/src/device/embree_intersection_device.h index e7d0f18b..20f55b57 100644 --- a/RadeonRays/src/device/embree_intersection_device.h +++ b/RadeonRays/src/device/embree_intersection_device.h @@ -27,6 +27,12 @@ THE SOFTWARE. #include #include "../async/thread_pool.h" +#ifdef __APPLE__ +#include +#else +#include +#endif + namespace RadeonRays { class Mesh; @@ -54,7 +60,10 @@ namespace RadeonRays void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override; void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; void* GetBvh() const override; - + + inline cl_command_queue GetCommandQueue() const { return m_command_queue; } + inline void SetCommandQueue(cl_command_queue command_queue) { m_command_queue = command_queue; } + protected: RTCScene GetEmbreeMesh(const Mesh*); void UpdateShape(const ShapeImpl*); @@ -66,9 +75,10 @@ namespace RadeonRays // embree device RTCDevice m_device; - + // Unity hack: OpenCL command queue + cl_command_queue m_command_queue; // scene for intersection - RTCScene m_scene; + RTCScene m_scene; //thread pool for parallelizing work with buffers mutable thread_pool m_pool; diff --git a/UnitTest/CMakeLists.txt b/UnitTest/CMakeLists.txt index 76e179e0..6a1d0d1e 100644 --- a/UnitTest/CMakeLists.txt +++ b/UnitTest/CMakeLists.txt @@ -44,6 +44,9 @@ endif (RR_USE_EMBREE) add_executable(UnitTest ${SOURCES}) target_link_libraries(UnitTest PRIVATE GTest RadeonRays Calc) +set_target_properties(UnitTest PROPERTIES + VS_DEBUGGER_WORKING_DIRECTORY ${RadeonRaysSDK_SOURCE_DIR}/RadeonRays) + #Add root for unittests. They use private headers target_include_directories(UnitTest PRIVATE "${RadeonRaysSDK_SOURCE_DIR}") @@ -53,7 +56,6 @@ else (NOT RR_SHARED_CALC) target_compile_definitions(UnitTest PRIVATE CALC_STATIC_LIBRARY) endif (RR_SHARED_CALC) - target_compile_features(UnitTest PRIVATE cxx_std_11) if (APPLE) target_compile_options(UnitTest PRIVATE -stdlib=libc++) @@ -71,7 +73,6 @@ if (RR_USE_VULKAN) target_compile_definitions(UnitTest PRIVATE USE_VULKAN=1) endif (RR_USE_VULKAN) - if (RR_ENABLE_RAYMASK) target_compile_definitions(UnitTest PRIVATE RR_RAY_MASK) endif (RR_ENABLE_RAYMASK) diff --git a/UnitTest/radeon_rays_apitest_embree.h b/UnitTest/radeon_rays_apitest_embree.h index f335e097..505d767a 100644 --- a/UnitTest/radeon_rays_apitest_embree.h +++ b/UnitTest/radeon_rays_apitest_embree.h @@ -268,8 +268,13 @@ TEST_F(ApiBackendEmbree, Intersection_1Ray) } +#ifdef RR_RAY_MASK // The test creates a single triangle mesh and tests attach/detach functionality TEST_F(ApiBackendEmbree, Intersection_1Ray_Masked) +#else +// The test creates a single triangle mesh and tests attach/detach functionality +TEST_F(ApiBackendEmbree, DISABLED_Intersection_1Ray_Masked) +#endif { Shape* mesh = nullptr;