Skip to content

Some Unity fixes #187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ elseif (UNIX)
add_definitions(-fvisibility=hidden)
endif (WIN32)

set(CMAKE_DEBUG_POSTFIX D)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

Expand Down Expand Up @@ -65,5 +66,3 @@ if (NOT RR_NO_TESTS)
add_subdirectory(Gtest)
add_subdirectory(UnitTest)
endif (NOT RR_NO_TESTS)


46 changes: 35 additions & 11 deletions RadeonRays/src/accelerator/bvh2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,12 @@ namespace RadeonRays
// Mutex to guard cv
std::mutex mutex;
// Indicates if we need to shutdown all the threads
std::atomic<bool> shutdown;
bool shutdown = false;
// Number of primitives processed so far
std::atomic<std::uint32_t> num_refs_processed;
num_refs_processed.store(0);

num_refs_processed.store(0);
shutdown.store(false);

// Push root request
requests.push(SplitRequest{
scene_min,
scene_max,
Expand All @@ -243,33 +242,42 @@ namespace RadeonRays
0u
});

// Worker build function
auto worker_thread = [&]()
{
// Local stack for requests
thread_local std::stack<SplitRequest> local_requests;

// Thread loop
for (;;)
{
// Wait for signal
{
// Wait on the global stack to receive a request
std::unique_lock<std::mutex> lock(mutex);
cv.wait(lock, [&]() { return !requests.empty() || shutdown; });

// If we have been awaken by shutdown, we need to leave asap
if (shutdown) return;

// Otherwise take a request from global stack and put it
// into our local stack
local_requests.push(requests.top());
requests.pop();
}

// Allocated space for requests
_MM_ALIGN16 SplitRequest request;
_MM_ALIGN16 SplitRequest request_left;
_MM_ALIGN16 SplitRequest request_right;

// Process local requests
// Start handling local stack of requests
while (!local_requests.empty())
{
// Pop next request
request = local_requests.top();
local_requests.pop();

// Handle it
auto node_type = HandleRequest(
request,
aabb_min,
Expand All @@ -281,28 +289,40 @@ namespace RadeonRays
request_left,
request_right);

// If it is a leaf, update number of processed primitives
// and continue
if (node_type == kLeaf)
{
num_refs_processed += static_cast<std::uint32_t>(request.num_refs);
continue;
}

if (request_right.num_refs > 4096u)
// Here we know we have just built and internal node,
// so we are going to handle its left child on this thread and
// its right child on:
// - this thread if it is small
// - another thread if it is huge (since this one is going to handle left child)
if (request_right.num_refs > 2048u)
{
// Put request into the global queue
std::unique_lock<std::mutex> lock(mutex);
requests.push(request_right);
// Wake up one of the workers
cv.notify_one();
}
else
{
// Put small request into the local queue
local_requests.push(request_right);
}

// Put left request to local stack (always handled on this thread)
local_requests.push(request_left);
}
}
};

// Launch several threads
auto num_threads = std::thread::hardware_concurrency();
std::vector<std::thread> threads(num_threads);

Expand All @@ -311,15 +331,19 @@ namespace RadeonRays
threads[i] = std::thread(worker_thread);
}

// Wait until all primitives are handled
while (num_refs_processed != num_aabbs)
{
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}

// Signal shutdown and wake up all the threads
shutdown.store(true);
cv.notify_all();

// Signal shutdown and wake up all the threads
{
std::unique_lock<std::mutex> lock(mutex);
shutdown = true;
cv.notify_all();
}

// Wait for all the threads to finish
for (auto i = 0u; i < num_threads; ++i)
{
Expand Down
37 changes: 25 additions & 12 deletions RadeonRays/src/api/radeon_rays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ THE SOFTWARE.
#define HANDLE_TYPE HMODULE

#ifndef _DEBUG
#define LIBNAME "Calc64.dll"
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
#define LIBNAME "Calc.dll"
#define LONGNAME "../bin/Release/##LIBNAME"
#else
#define LIBNAME "Calc64D.dll"
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
#define LIBNAME "CalcD.dll"
#define LONGNAME "../bin/Debug/##LIBNAME"
#endif
#elif __linux__
// Linux
Expand All @@ -66,11 +66,11 @@ THE SOFTWARE.
#define HANDLE_TYPE void*

#ifndef _DEBUG
#define LIBNAME "libCalc64.so"
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
#define LIBNAME "libCalc.so"
#define LONGNAME "../bin/Release/##LIBNAME"
#else
#define LIBNAME "libCalc64D.so"
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
#define LIBNAME "libCalcD.so"
#define LONGNAME "../bin/Debug/##LIBNAME"
#endif
#else
// MacOS
Expand All @@ -80,11 +80,11 @@ THE SOFTWARE.
#define HANDLE_TYPE void*

#ifndef _DEBUG
#define LIBNAME "libCalc64.dylib"
#define LONGNAME "../Bin/Release/x64/##LIBNAME"
#define LIBNAME "libCalc.dylib"
#define LONGNAME "../bin/Release/##LIBNAME"
#else
#define LIBNAME "libCalc64D.dylib"
#define LONGNAME "../Bin/Debug/x64/##LIBNAME"
#define LIBNAME "libCalcD.dylib"
#define LONGNAME "../bin/Debug/##LIBNAME"
#endif
#endif
#endif
Expand Down Expand Up @@ -287,6 +287,19 @@ namespace RadeonRays
#ifdef USE_OPENCL
RRAPI IntersectionApi* CreateFromOpenClContext(cl_context context, cl_device_id device, cl_command_queue queue)
{
if (s_calc_platform == DeviceInfo::kEmbree)
{
IntersectionApi* api = nullptr;
#ifdef USE_EMBREE
api = IntersectionApi::Create(0);
if (!api)
return nullptr;
EmbreeIntersectionDevice* device = static_cast<EmbreeIntersectionDevice*>(static_cast<IntersectionApiImpl*>(api)->GetDevice());
device->SetCommandQueue(queue);
#endif
return api;
}

auto calc = dynamic_cast<Calc::Calc*>(GetCalcOpenCL());

if (calc)
Expand Down
13 changes: 10 additions & 3 deletions RadeonRays/src/device/embree_intersection_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ namespace RadeonRays
};

EmbreeIntersectionDevice::EmbreeIntersectionDevice()
: m_pool(1)
: m_command_queue(nullptr)
, m_pool(1)
{
m_device = rtcNewDevice(nullptr);
RTCError result = rtcDeviceGetError(m_device);
Expand Down Expand Up @@ -488,12 +489,18 @@ namespace RadeonRays

void EmbreeIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const
{
Throw("Not implemented for embree device.");
if (m_command_queue)
clFinish(m_command_queue); // wait for kernels to complete so numrays is available
const EmbreeBuffer* fireNumrays = dynamic_cast<const EmbreeBuffer*>(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer.");
QueryIntersection(rays, std::min(*static_cast<const int*>(fireNumrays->GetData()), maxrays), hits, waitevent, event);
}

void EmbreeIntersectionDevice::QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const
{
Throw("Not implemented for embree device.");
if (m_command_queue)
clFinish(m_command_queue); // wait for kernels to complete so numrays is available
const EmbreeBuffer* fireNumrays = dynamic_cast<const EmbreeBuffer*>(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer.");
QueryOcclusion(rays, std::min(*static_cast<const int*>(fireNumrays->GetData()), maxrays), hits, waitevent, event);
}

void* EmbreeIntersectionDevice::GetBvh() const
Expand Down
16 changes: 13 additions & 3 deletions RadeonRays/src/device/embree_intersection_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ THE SOFTWARE.
#include <embree2/rtcore.h>
#include "../async/thread_pool.h"

#ifdef __APPLE__
#include <OpenCL/OpenCL.h>
#else
#include <CL/cl.h>
#endif

namespace RadeonRays
{
class Mesh;
Expand Down Expand Up @@ -54,7 +60,10 @@ namespace RadeonRays
void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override;
void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override;
void* GetBvh() const override;


inline cl_command_queue GetCommandQueue() const { return m_command_queue; }
inline void SetCommandQueue(cl_command_queue command_queue) { m_command_queue = command_queue; }

protected:
RTCScene GetEmbreeMesh(const Mesh*);
void UpdateShape(const ShapeImpl*);
Expand All @@ -66,9 +75,10 @@ namespace RadeonRays

// embree device
RTCDevice m_device;

// Unity hack: OpenCL command queue
cl_command_queue m_command_queue;
// scene for intersection
RTCScene m_scene;
RTCScene m_scene;

//thread pool for parallelizing work with buffers
mutable thread_pool<void> m_pool;
Expand Down
5 changes: 3 additions & 2 deletions UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ endif (RR_USE_EMBREE)
add_executable(UnitTest ${SOURCES})

target_link_libraries(UnitTest PRIVATE GTest RadeonRays Calc)
set_target_properties(UnitTest PROPERTIES
VS_DEBUGGER_WORKING_DIRECTORY ${RadeonRaysSDK_SOURCE_DIR}/RadeonRays)

#Add root for unittests. They use private headers
target_include_directories(UnitTest PRIVATE "${RadeonRaysSDK_SOURCE_DIR}")

Expand All @@ -53,7 +56,6 @@ else (NOT RR_SHARED_CALC)
target_compile_definitions(UnitTest PRIVATE CALC_STATIC_LIBRARY)
endif (RR_SHARED_CALC)


target_compile_features(UnitTest PRIVATE cxx_std_11)
if (APPLE)
target_compile_options(UnitTest PRIVATE -stdlib=libc++)
Expand All @@ -71,7 +73,6 @@ if (RR_USE_VULKAN)
target_compile_definitions(UnitTest PRIVATE USE_VULKAN=1)
endif (RR_USE_VULKAN)


if (RR_ENABLE_RAYMASK)
target_compile_definitions(UnitTest PRIVATE RR_RAY_MASK)
endif (RR_ENABLE_RAYMASK)
5 changes: 5 additions & 0 deletions UnitTest/radeon_rays_apitest_embree.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,13 @@ TEST_F(ApiBackendEmbree, Intersection_1Ray)
}


#ifdef RR_RAY_MASK
// The test creates a single triangle mesh and tests attach/detach functionality
TEST_F(ApiBackendEmbree, Intersection_1Ray_Masked)
#else
// The test creates a single triangle mesh and tests attach/detach functionality
TEST_F(ApiBackendEmbree, DISABLED_Intersection_1Ray_Masked)
#endif
{
Shape* mesh = nullptr;

Expand Down