diff --git a/CLW/CLWProgram.cpp b/CLW/CLWProgram.cpp index 1ab4e86e..159ea1fe 100644 --- a/CLW/CLWProgram.cpp +++ b/CLW/CLWProgram.cpp @@ -46,9 +46,10 @@ static void load_file_contents(std::string const& name, std::vector& conte in.seekg(0, std::ios::beg); - contents.resize(static_cast(fileSize)); + contents.resize(static_cast(fileSize) + 1); in.read(&contents[0], fileSize); + contents[static_cast(fileSize)] = '\0'; } else { diff --git a/RadeonRays/include/radeon_rays.h b/RadeonRays/include/radeon_rays.h index 17d26b16..7bbdc7fa 100644 --- a/RadeonRays/include/radeon_rays.h +++ b/RadeonRays/include/radeon_rays.h @@ -268,6 +268,9 @@ namespace RadeonRays // Find any intersection. // The call is asynchronous. Event pointer mights be nullptrs. virtual void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const = 0; + + + virtual void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const = 0; // Find closest intersection, number of rays is in remote memory // The call is asynchronous. Event pointers might be nullptrs. @@ -313,4 +316,4 @@ namespace RadeonRays } -#endif // RADEON_RAYS_H \ No newline at end of file +#endif // RADEON_RAYS_H diff --git a/RadeonRays/src/api/radeon_rays_impl.cpp b/RadeonRays/src/api/radeon_rays_impl.cpp index 10b2e787..b23ab10b 100644 --- a/RadeonRays/src/api/radeon_rays_impl.cpp +++ b/RadeonRays/src/api/radeon_rays_impl.cpp @@ -130,6 +130,11 @@ namespace RadeonRays { m_device->QueryOcclusion(rays, numrays, hitresults, waitevent, event); } + + void IntersectionApiImpl::QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const + { + m_device->QueryOccluded2dSumLinear2(origins, directions, koeffs, offset_directions, offset_koeffs, numorigins, numdirections, hitresults, waitevent, event); + } void IntersectionApiImpl::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const { diff --git a/RadeonRays/src/api/radeon_rays_impl.h b/RadeonRays/src/api/radeon_rays_impl.h index bde5fa04..88de4186 100644 --- a/RadeonRays/src/api/radeon_rays_impl.h +++ b/RadeonRays/src/api/radeon_rays_impl.h @@ -111,6 +111,8 @@ namespace RadeonRays // Find any intersection. // The call is asynchronous. Event pointer mights be nullptrs. void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; + + void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const override; // Find closest intersection, number of rays is in remote memory // TODO: do we need to modify rays' intersection range? diff --git a/RadeonRays/src/device/calc_intersection_device.cpp b/RadeonRays/src/device/calc_intersection_device.cpp index d13d5a14..c67e3878 100644 --- a/RadeonRays/src/device/calc_intersection_device.cpp +++ b/RadeonRays/src/device/calc_intersection_device.cpp @@ -286,6 +286,38 @@ namespace RadeonRays m_intersector->QueryOcclusion(0, ray_buffer, numrays, hit_buffer, e, nullptr); } } + + + void CalcIntersectionDevice::QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const + { + // Extract Calc buffers from their holders + auto origins_buffer = static_cast(origins)->m_buffer.get(); + auto directions_buffer = static_cast(directions)->m_buffer.get(); + auto koefs_buffer = static_cast(koefs)->m_buffer.get(); + + auto offset_directions_buffer = static_cast(offset_directions)->m_buffer.get(); + auto offset_koefs_buffer = static_cast(offset_koefs)->m_buffer.get(); + + auto hit_buffer = static_cast(hits)->m_buffer.get(); + // If waitevent is passed in we have to extract it as well + auto e = waitevent ? static_cast(waitevent)->m_event.get() : nullptr; + + if (event) + { + // event pointer has been provided, so construct holder and return event to the user + Calc::Event* calc_event = nullptr; + m_intersector->QueryOccluded2dSumLinear2(0, origins_buffer, directions_buffer, koefs_buffer, offset_directions_buffer, offset_koefs_buffer, numorigins, numdirections, hit_buffer, e, &calc_event); + + auto holder = CreateEventHolder(); + holder->Set(m_device.get(), calc_event); + *event = holder; + } + else + { + m_intersector->QueryOccluded2dSumLinear2(0, origins_buffer, directions_buffer, koefs_buffer, offset_directions_buffer, offset_koefs_buffer, numorigins, numdirections, hit_buffer, e, nullptr); + } + } + void CalcIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const { diff --git a/RadeonRays/src/device/calc_intersection_device.h b/RadeonRays/src/device/calc_intersection_device.h index bca52bce..6adacdc1 100644 --- a/RadeonRays/src/device/calc_intersection_device.h +++ b/RadeonRays/src/device/calc_intersection_device.h @@ -61,6 +61,7 @@ namespace RadeonRays void QueryIntersection(Buffer const* rays, int numrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override; void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; + void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const override; void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override; diff --git a/RadeonRays/src/device/intersection_device.h b/RadeonRays/src/device/intersection_device.h index 76b7c97e..60ad163a 100644 --- a/RadeonRays/src/device/intersection_device.h +++ b/RadeonRays/src/device/intersection_device.h @@ -72,6 +72,8 @@ namespace RadeonRays // The call waits until waitevent is resolved (on a target device) if waitevent != nullptr. // The call is non-blocking if event is passed it, otherwise (event == nullptr) it is blocking. virtual void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hits, Event const* waitevent, Event** event) const = 0; + + virtual void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const = 0; // Find intersection for the rays in rays buffer and write them into hits buffer. Take the number of rays from the buffer in remote memory. // rays is assumed AOS with elements of type RadeonRays::ray. diff --git a/RadeonRays/src/intersector/intersector.cpp b/RadeonRays/src/intersector/intersector.cpp index f8f8a726..fde89e68 100644 --- a/RadeonRays/src/intersector/intersector.cpp +++ b/RadeonRays/src/intersector/intersector.cpp @@ -6,7 +6,10 @@ namespace RadeonRays Intersector::Intersector(Calc::Device *device) : m_device(device), m_counter(device->CreateBuffer(sizeof(int), Calc::BufferType::kRead), - [device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); }) + [device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); }), + m_counter2(device->CreateBuffer(sizeof(int), Calc::BufferType::kRead), + [device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); }) + { } @@ -40,6 +43,18 @@ namespace RadeonRays m_device->Finish(0); Occluded(queue_idx, rays, m_counter.get(), num_rays, hits, wait_event, event); } + + void Intersector::QueryOccluded2dSumLinear2(std::uint32_t queue_idx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, Calc::Buffer const *offset_directions, + Calc::Buffer const *offset_koefs, std::uint32_t num_origins, std::uint32_t num_directions, + Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const + { + m_device->WriteBuffer(m_counter.get(), 0, 0, sizeof(num_origins), &num_origins, nullptr); + m_device->WriteBuffer(m_counter2.get(), 0, 0, sizeof(num_directions), &num_directions, nullptr); + m_device->Finish(0); + + int num_rays = num_origins * num_directions; + Occluded2dSumLinear2(queue_idx, origins, directions, koefs, offset_directions, offset_koefs, m_counter.get(), m_counter2.get(), num_rays, hits, wait_event, event); + } void Intersector::QueryIntersection(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const diff --git a/RadeonRays/src/intersector/intersector.h b/RadeonRays/src/intersector/intersector.h index 1e613197..d360dbd4 100644 --- a/RadeonRays/src/intersector/intersector.h +++ b/RadeonRays/src/intersector/intersector.h @@ -102,6 +102,10 @@ namespace RadeonRays */ void QueryOcclusion(std::uint32_t queue_idx, Calc::Buffer const* rays, std::uint32_t num_rays, Calc::Buffer* hits, Calc::Event const* wait_event, Calc::Event** event) const; + + void QueryOccluded2dSumLinear2(std::uint32_t queue_idx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, Calc::Buffer const *offset_directions, + Calc::Buffer const *offset_koefs, std::uint32_t num_origins, std::uint32_t num_directions, + Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const; /** \brief Query intersection for a batch of rays @@ -152,12 +156,19 @@ namespace RadeonRays virtual void Occluded(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const = 0; + + virtual void Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, + Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs, + Calc::Buffer const *num_origins, Calc::Buffer const *num_directions, + std::uint32_t maxrays, Calc::Buffer *hits, + Calc::Event const *wait_event, Calc::Event **event) const {} protected: // Device to use Calc::Device* m_device; // Buffer holding ray count std::unique_ptr> m_counter; + std::unique_ptr> m_counter2; }; } diff --git a/RadeonRays/src/intersector/intersector_skip_links.cpp b/RadeonRays/src/intersector/intersector_skip_links.cpp index 1021f500..820ce8ba 100644 --- a/RadeonRays/src/intersector/intersector_skip_links.cpp +++ b/RadeonRays/src/intersector/intersector_skip_links.cpp @@ -52,6 +52,7 @@ namespace RadeonRays Calc::Executable* executable; Calc::Function* isect_func; Calc::Function* occlude_func; + Calc::Function* occlude_func2d_sum_linear; GpuData(Calc::Device* d) : device(d) @@ -71,6 +72,7 @@ namespace RadeonRays { executable->DeleteFunction(isect_func); executable->DeleteFunction(occlude_func); + executable->DeleteFunction(occlude_func2d_sum_linear); device->DeleteExecutable(executable); } } @@ -128,6 +130,7 @@ namespace RadeonRays m_gpudata->isect_func = m_gpudata->executable->CreateFunction("intersect_main"); m_gpudata->occlude_func = m_gpudata->executable->CreateFunction("occluded_main"); + m_gpudata->occlude_func2d_sum_linear = m_gpudata->executable->CreateFunction("occluded_main_2d_sum_linear"); } void IntersectorSkipLinks::Process(World const& world) @@ -446,5 +449,33 @@ namespace RadeonRays m_device->Execute(func, queueidx, globalsize, localsize, event); } + + void IntersectorSkipLinks::Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, + Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs, + Calc::Buffer const *num_origins, Calc::Buffer const *num_directions, + std::uint32_t maxrays, Calc::Buffer *hits, + Calc::Event const *wait_event, Calc::Event **event) const { + auto& func = m_gpudata->occlude_func2d_sum_linear; + + // Set args + int arg = 0; + + func->SetArg(arg++, m_gpudata->bvh); + func->SetArg(arg++, m_gpudata->vertices); + func->SetArg(arg++, m_gpudata->faces); + func->SetArg(arg++, origins); + func->SetArg(arg++, directions); + func->SetArg(arg++, koefs); + func->SetArg(arg++, offset_directions); + func->SetArg(arg++, offset_koefs); + func->SetArg(arg++, num_origins); + func->SetArg(arg++, num_directions); + func->SetArg(arg++, hits); + + size_t localsize = kWorkGroupSize; + size_t globalsize = ((maxrays + kWorkGroupSize - 1) / kWorkGroupSize) * kWorkGroupSize; + + m_device->Execute(func, queueidx, globalsize, localsize, event); + } } diff --git a/RadeonRays/src/intersector/intersector_skip_links.h b/RadeonRays/src/intersector/intersector_skip_links.h index 3d5b1bcb..35271e1d 100644 --- a/RadeonRays/src/intersector/intersector_skip_links.h +++ b/RadeonRays/src/intersector/intersector_skip_links.h @@ -88,6 +88,13 @@ namespace RadeonRays void Occluded(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const override; + + // Occulusion2d implementation + void Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, + Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs, + Calc::Buffer const *num_origins, Calc::Buffer const *num_directions, + std::uint32_t maxrays, Calc::Buffer *hits, + Calc::Event const *wait_event, Calc::Event **event) const override; private: struct GpuData; diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl index e00c94f1..25436b2c 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl @@ -288,4 +288,118 @@ void occluded_main( hits[global_id] = MISS_MARKER; } } -} \ No newline at end of file +} + + +__attribute__((reqd_work_group_size(64, 1, 1))) +KERNEL +void occluded_main_2d_sum_linear( +// BVH nodes +GLOBAL bvh_node const* restrict nodes, +// Triangle vertices +GLOBAL float3 const* restrict vertices, +// Triangle indices +GLOBAL Face const* restrict faces, + +// Rays +GLOBAL float4 const* restrict origins, +GLOBAL float4 const* restrict directions, +GLOBAL float4 const* restrict koefs, + +GLOBAL int const* restrict offset_directions, +GLOBAL int const* restrict offset_koefs, + +// Number of origins and directions +GLOBAL int const* restrict num_origins, +GLOBAL int const* restrict num_directions, +// Hit data +GLOBAL float4* hits +) +{ + int num_rays = (*num_origins) * (*num_directions); + + int global_id = get_global_id(0); + + int origin_id = global_id % (*num_origins); + int direction_id = (int)(global_id / (*num_origins)); + + // Handle only working subset + if (global_id < num_rays) + { + const int direction_offset = offset_directions[origin_id]; + const int koefs_offset = offset_koefs[origin_id]; + + const float4 koef = koefs[direction_id + koefs_offset]; + + // Create ray + ray r; + r.o = origins[origin_id]; + r.d = directions[direction_id + direction_offset]; + r.extra.x = -1; + r.extra.y = 1; + r.doBackfaceCulling = 0; + r.padding = 1; + + { + // Precompute inverse direction and origin / dir for bbox testing + float3 const invdir = safe_invdir(r); + float3 const oxinvdir = -r.o.xyz * invdir; + // Intersection parametric distance + float t_max = r.o.w; + + // Current node address + int addr = 0; + + while (addr != INVALID_IDX) + { + // Fetch next node + bvh_node node = nodes[addr]; + // Intersect against bbox + float2 s = fast_intersect_bbox1(node, invdir, oxinvdir, t_max); + + if (s.x <= s.y) + { + // Check if the node is a leaf + if (LEAFNODE(node)) + { + int const face_idx = STARTIDX(node); + Face const face = faces[face_idx]; + #ifdef RR_RAY_MASK + if (ray_get_mask(&r) != face.shape_id) + { + #endif // RR_RAY_MASK + float3 const v1 = vertices[face.idx[0]]; + float3 const v2 = vertices[face.idx[1]]; + float3 const v3 = vertices[face.idx[2]]; + + // Intersect triangle + float const f = fast_intersect_triangle(r, v1, v2, v3, t_max); + // If hit store the result and bail out + if (f < t_max) + { + hits[origin_id].x += koef.x; + hits[origin_id].y += koef.z; + return; + } + #ifdef RR_RAY_MASK + } + #endif // RR_RAY_MASK + } + else + { + // Move to next node otherwise. + // Left child is always at addr + 1 + ++addr; + continue; + } + } + + addr = NEXT(node); + } + + // Finished traversal, but no intersection found + hits[origin_id].x += koef.y; + hits[origin_id].y += koef.w; + } + } +}