Skip to content

Kernel modification draft #190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CLW/CLWProgram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ static void load_file_contents(std::string const& name, std::vector<char>& conte

in.seekg(0, std::ios::beg);

contents.resize(static_cast<unsigned>(fileSize));
contents.resize(static_cast<unsigned>(fileSize) + 1);

in.read(&contents[0], fileSize);
contents[static_cast<unsigned>(fileSize)] = '\0';
}
else
{
Expand Down
5 changes: 4 additions & 1 deletion RadeonRays/include/radeon_rays.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ namespace RadeonRays
// Find any intersection.
// The call is asynchronous. Event pointer mights be nullptrs.
virtual void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const = 0;


virtual void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const = 0;

// Find closest intersection, number of rays is in remote memory
// The call is asynchronous. Event pointers might be nullptrs.
Expand Down Expand Up @@ -313,4 +316,4 @@ namespace RadeonRays
}


#endif // RADEON_RAYS_H
#endif // RADEON_RAYS_H
5 changes: 5 additions & 0 deletions RadeonRays/src/api/radeon_rays_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ namespace RadeonRays
{
m_device->QueryOcclusion(rays, numrays, hitresults, waitevent, event);
}

void IntersectionApiImpl::QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const
{
m_device->QueryOccluded2dSumLinear2(origins, directions, koeffs, offset_directions, offset_koeffs, numorigins, numdirections, hitresults, waitevent, event);
}

void IntersectionApiImpl::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const
{
Expand Down
2 changes: 2 additions & 0 deletions RadeonRays/src/api/radeon_rays_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ namespace RadeonRays
// Find any intersection.
// The call is asynchronous. Event pointer mights be nullptrs.
void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const override;

void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koeffs, Buffer const* offset_directions, Buffer const* offset_koeffs, int numorigins, int numdirections, Buffer* hitresults, Event const* waitevent, Event** event) const override;

// Find closest intersection, number of rays is in remote memory
// TODO: do we need to modify rays' intersection range?
Expand Down
32 changes: 32 additions & 0 deletions RadeonRays/src/device/calc_intersection_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,38 @@ namespace RadeonRays
m_intersector->QueryOcclusion(0, ray_buffer, numrays, hit_buffer, e, nullptr);
}
}


void CalcIntersectionDevice::QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const
{
// Extract Calc buffers from their holders
auto origins_buffer = static_cast<CalcBufferHolder const*>(origins)->m_buffer.get();
auto directions_buffer = static_cast<CalcBufferHolder const*>(directions)->m_buffer.get();
auto koefs_buffer = static_cast<CalcBufferHolder const*>(koefs)->m_buffer.get();

auto offset_directions_buffer = static_cast<CalcBufferHolder const*>(offset_directions)->m_buffer.get();
auto offset_koefs_buffer = static_cast<CalcBufferHolder const*>(offset_koefs)->m_buffer.get();

auto hit_buffer = static_cast<CalcBufferHolder const*>(hits)->m_buffer.get();
// If waitevent is passed in we have to extract it as well
auto e = waitevent ? static_cast<CalcEventHolder const*>(waitevent)->m_event.get() : nullptr;

if (event)
{
// event pointer has been provided, so construct holder and return event to the user
Calc::Event* calc_event = nullptr;
m_intersector->QueryOccluded2dSumLinear2(0, origins_buffer, directions_buffer, koefs_buffer, offset_directions_buffer, offset_koefs_buffer, numorigins, numdirections, hit_buffer, e, &calc_event);

auto holder = CreateEventHolder();
holder->Set(m_device.get(), calc_event);
*event = holder;
}
else
{
m_intersector->QueryOccluded2dSumLinear2(0, origins_buffer, directions_buffer, koefs_buffer, offset_directions_buffer, offset_koefs_buffer, numorigins, numdirections, hit_buffer, e, nullptr);
}
}


void CalcIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const
{
Expand Down
1 change: 1 addition & 0 deletions RadeonRays/src/device/calc_intersection_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ namespace RadeonRays
void QueryIntersection(Buffer const* rays, int numrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override;

void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const override;
void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const override;

void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override;

Expand Down
2 changes: 2 additions & 0 deletions RadeonRays/src/device/intersection_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ namespace RadeonRays
// The call waits until waitevent is resolved (on a target device) if waitevent != nullptr.
// The call is non-blocking if event is passed it, otherwise (event == nullptr) it is blocking.
virtual void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hits, Event const* waitevent, Event** event) const = 0;

virtual void QueryOccluded2dSumLinear2(Buffer const* origins, Buffer const* directions, Buffer const* koefs, Buffer const* offset_directions, Buffer const* offset_koefs, int numorigins, int numdirections, Buffer* hits, Event const* waitevent, Event** event) const = 0;

// Find intersection for the rays in rays buffer and write them into hits buffer. Take the number of rays from the buffer in remote memory.
// rays is assumed AOS with elements of type RadeonRays::ray.
Expand Down
17 changes: 16 additions & 1 deletion RadeonRays/src/intersector/intersector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ namespace RadeonRays
Intersector::Intersector(Calc::Device *device)
: m_device(device),
m_counter(device->CreateBuffer(sizeof(int), Calc::BufferType::kRead),
[device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); })
[device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); }),
m_counter2(device->CreateBuffer(sizeof(int), Calc::BufferType::kRead),
[device](Calc::Buffer* buffer) { device->DeleteBuffer(buffer); })

{
}

Expand Down Expand Up @@ -40,6 +43,18 @@ namespace RadeonRays
m_device->Finish(0);
Occluded(queue_idx, rays, m_counter.get(), num_rays, hits, wait_event, event);
}

void Intersector::QueryOccluded2dSumLinear2(std::uint32_t queue_idx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, Calc::Buffer const *offset_directions,
Calc::Buffer const *offset_koefs, std::uint32_t num_origins, std::uint32_t num_directions,
Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const
{
m_device->WriteBuffer(m_counter.get(), 0, 0, sizeof(num_origins), &num_origins, nullptr);
m_device->WriteBuffer(m_counter2.get(), 0, 0, sizeof(num_directions), &num_directions, nullptr);
m_device->Finish(0);

int num_rays = num_origins * num_directions;
Occluded2dSumLinear2(queue_idx, origins, directions, koefs, offset_directions, offset_koefs, m_counter.get(), m_counter2.get(), num_rays, hits, wait_event, event);
}

void Intersector::QueryIntersection(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays,
std::uint32_t max_rays, Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const
Expand Down
11 changes: 11 additions & 0 deletions RadeonRays/src/intersector/intersector.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ namespace RadeonRays
*/
void QueryOcclusion(std::uint32_t queue_idx, Calc::Buffer const* rays, std::uint32_t num_rays,
Calc::Buffer* hits, Calc::Event const* wait_event, Calc::Event** event) const;

void QueryOccluded2dSumLinear2(std::uint32_t queue_idx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs, Calc::Buffer const *offset_directions,
Calc::Buffer const *offset_koefs, std::uint32_t num_origins, std::uint32_t num_directions,
Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const;

/**
\brief Query intersection for a batch of rays
Expand Down Expand Up @@ -152,12 +156,19 @@ namespace RadeonRays
virtual void Occluded(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays,
std::uint32_t max_rays, Calc::Buffer *hits,
Calc::Event const *wait_event, Calc::Event **event) const = 0;

virtual void Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs,
Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs,
Calc::Buffer const *num_origins, Calc::Buffer const *num_directions,
std::uint32_t maxrays, Calc::Buffer *hits,
Calc::Event const *wait_event, Calc::Event **event) const {}

protected:
// Device to use
Calc::Device* m_device;
// Buffer holding ray count
std::unique_ptr<Calc::Buffer, std::function<void(Calc::Buffer*)>> m_counter;
std::unique_ptr<Calc::Buffer, std::function<void(Calc::Buffer*)>> m_counter2;
};
}

Expand Down
31 changes: 31 additions & 0 deletions RadeonRays/src/intersector/intersector_skip_links.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ namespace RadeonRays
Calc::Executable* executable;
Calc::Function* isect_func;
Calc::Function* occlude_func;
Calc::Function* occlude_func2d_sum_linear;

GpuData(Calc::Device* d)
: device(d)
Expand All @@ -71,6 +72,7 @@ namespace RadeonRays
{
executable->DeleteFunction(isect_func);
executable->DeleteFunction(occlude_func);
executable->DeleteFunction(occlude_func2d_sum_linear);
device->DeleteExecutable(executable);
}
}
Expand Down Expand Up @@ -128,6 +130,7 @@ namespace RadeonRays

m_gpudata->isect_func = m_gpudata->executable->CreateFunction("intersect_main");
m_gpudata->occlude_func = m_gpudata->executable->CreateFunction("occluded_main");
m_gpudata->occlude_func2d_sum_linear = m_gpudata->executable->CreateFunction("occluded_main_2d_sum_linear");
}

void IntersectorSkipLinks::Process(World const& world)
Expand Down Expand Up @@ -446,5 +449,33 @@ namespace RadeonRays

m_device->Execute(func, queueidx, globalsize, localsize, event);
}

void IntersectorSkipLinks::Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs,
Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs,
Calc::Buffer const *num_origins, Calc::Buffer const *num_directions,
std::uint32_t maxrays, Calc::Buffer *hits,
Calc::Event const *wait_event, Calc::Event **event) const {
auto& func = m_gpudata->occlude_func2d_sum_linear;

// Set args
int arg = 0;

func->SetArg(arg++, m_gpudata->bvh);
func->SetArg(arg++, m_gpudata->vertices);
func->SetArg(arg++, m_gpudata->faces);
func->SetArg(arg++, origins);
func->SetArg(arg++, directions);
func->SetArg(arg++, koefs);
func->SetArg(arg++, offset_directions);
func->SetArg(arg++, offset_koefs);
func->SetArg(arg++, num_origins);
func->SetArg(arg++, num_directions);
func->SetArg(arg++, hits);

size_t localsize = kWorkGroupSize;
size_t globalsize = ((maxrays + kWorkGroupSize - 1) / kWorkGroupSize) * kWorkGroupSize;

m_device->Execute(func, queueidx, globalsize, localsize, event);
}

}
7 changes: 7 additions & 0 deletions RadeonRays/src/intersector/intersector_skip_links.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ namespace RadeonRays
void Occluded(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays,
std::uint32_t max_rays, Calc::Buffer *hits,
Calc::Event const *wait_event, Calc::Event **event) const override;

// Occulusion2d implementation
void Occluded2dSumLinear2(std::uint32_t queueidx, Calc::Buffer const *origins, Calc::Buffer const *directions, Calc::Buffer const *koefs,
Calc::Buffer const *offset_directions, Calc::Buffer const *offset_koefs,
Calc::Buffer const *num_origins, Calc::Buffer const *num_directions,
std::uint32_t maxrays, Calc::Buffer *hits,
Calc::Event const *wait_event, Calc::Event **event) const override;

private:
struct GpuData;
Expand Down
116 changes: 115 additions & 1 deletion RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl
Original file line number Diff line number Diff line change
Expand Up @@ -288,4 +288,118 @@ void occluded_main(
hits[global_id] = MISS_MARKER;
}
}
}
}


__attribute__((reqd_work_group_size(64, 1, 1)))
KERNEL
void occluded_main_2d_sum_linear(
// BVH nodes
GLOBAL bvh_node const* restrict nodes,
// Triangle vertices
GLOBAL float3 const* restrict vertices,
// Triangle indices
GLOBAL Face const* restrict faces,

// Rays
GLOBAL float4 const* restrict origins,
GLOBAL float4 const* restrict directions,
GLOBAL float4 const* restrict koefs,

GLOBAL int const* restrict offset_directions,
GLOBAL int const* restrict offset_koefs,

// Number of origins and directions
GLOBAL int const* restrict num_origins,
GLOBAL int const* restrict num_directions,
// Hit data
GLOBAL float4* hits
)
{
int num_rays = (*num_origins) * (*num_directions);

int global_id = get_global_id(0);

int origin_id = global_id % (*num_origins);
int direction_id = (int)(global_id / (*num_origins));

// Handle only working subset
if (global_id < num_rays)
{
const int direction_offset = offset_directions[origin_id];
const int koefs_offset = offset_koefs[origin_id];

const float4 koef = koefs[direction_id + koefs_offset];

// Create ray
ray r;
r.o = origins[origin_id];
r.d = directions[direction_id + direction_offset];
r.extra.x = -1;
r.extra.y = 1;
r.doBackfaceCulling = 0;
r.padding = 1;

{
// Precompute inverse direction and origin / dir for bbox testing
float3 const invdir = safe_invdir(r);
float3 const oxinvdir = -r.o.xyz * invdir;
// Intersection parametric distance
float t_max = r.o.w;

// Current node address
int addr = 0;

while (addr != INVALID_IDX)
{
// Fetch next node
bvh_node node = nodes[addr];
// Intersect against bbox
float2 s = fast_intersect_bbox1(node, invdir, oxinvdir, t_max);

if (s.x <= s.y)
{
// Check if the node is a leaf
if (LEAFNODE(node))
{
int const face_idx = STARTIDX(node);
Face const face = faces[face_idx];
#ifdef RR_RAY_MASK
if (ray_get_mask(&r) != face.shape_id)
{
#endif // RR_RAY_MASK
float3 const v1 = vertices[face.idx[0]];
float3 const v2 = vertices[face.idx[1]];
float3 const v3 = vertices[face.idx[2]];

// Intersect triangle
float const f = fast_intersect_triangle(r, v1, v2, v3, t_max);
// If hit store the result and bail out
if (f < t_max)
{
hits[origin_id].x += koef.x;
hits[origin_id].y += koef.z;
return;
}
#ifdef RR_RAY_MASK
}
#endif // RR_RAY_MASK
}
else
{
// Move to next node otherwise.
// Left child is always at addr + 1
++addr;
continue;
}
}

addr = NEXT(node);
}

// Finished traversal, but no intersection found
hits[origin_id].x += koef.y;
hits[origin_id].y += koef.w;
}
}
}