Skip to content

[SYCL][PI][CUDA] Implements select kernel and kernel-group queries #1180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,17 @@ typedef enum {
PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS,
PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT,
PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT,
PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM
PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM,
PI_KERNEL_INFO_ATTRIBUTES = CL_KERNEL_ATTRIBUTES
} _pi_kernel_info;

typedef enum {
PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE,
PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE
PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE,
PI_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE =
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
PI_KERNEL_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
} _pi_kernel_group_info;

typedef enum {
Expand Down
21 changes: 21 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2107,6 +2107,9 @@ pi_result cuda_piKernelGetInfo(
return getInfo(param_value_size, param_value, param_value_size_ret,
kernel->get_program());
}
case PI_KERNEL_INFO_ATTRIBUTES: {
return getInfo(param_value_size, param_value, param_value_size_ret, "");
}
default: {
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down Expand Up @@ -2154,6 +2157,24 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_uint64(bytes));
}
case PI_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
// Work groups should be multiples of the warp size
int warpSize = 0;
cl::sycl::detail::pi::assertion(
cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE,
device->get()) == CUDA_SUCCESS);
return getInfo(param_value_size, param_value, param_value_size_ret,
static_cast<size_t>(warpSize));
}
case PI_KERNEL_PRIVATE_MEM_SIZE: {
// OpenCL PRIVATE == CUDA LOCAL
int bytes = 0;
cl::sycl::detail::pi::assertion(
cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
kernel->get()) == CUDA_SUCCESS);
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_uint64(bytes));
}
default:
PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
Expand Down
66 changes: 66 additions & 0 deletions sycl/test/basic_tests/kernel_info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

//==--- kernel_info.cpp - SYCL kernel info test ----------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>

using namespace cl::sycl;

void check(bool condition, const char *conditionString, const char *filename,
const long line) noexcept {
if (!condition) {
std::cerr << "CHECK failed in " << filename << "#" << line << " "
<< conditionString << "\n";
std::abort();
}
}

#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__)

int main() {
queue q;

buffer<int, 1> buf(range<1>(1));
program prg(q.get_context());

prg.build_with_kernel_type<class SingleTask>();
kernel krn = prg.get_kernel<class SingleTask>();

q.submit([&](handler &cgh) {
auto acc = buf.get_access<access::mode::read_write>(cgh);
cgh.single_task<class SingleTask>(krn, [=]() { acc[0] = acc[0] + 1; });
});

const string_class krnName = krn.get_info<info::kernel::function_name>();
CHECK(!krnName.empty());
const cl_uint krnArgCount = krn.get_info<info::kernel::num_args>();
CHECK(krnArgCount > 0);
const context krnCtx = krn.get_info<info::kernel::context>();
CHECK(krnCtx == q.get_context());
const program krnPrg = krn.get_info<info::kernel::program>();
CHECK(krnPrg == prg);
const cl_uint krnRefCount = krn.get_info<info::kernel::reference_count>();
CHECK(krnRefCount > 0);
const string_class krnAttr = krn.get_info<info::kernel::attributes>();
CHECK(krnAttr.empty());

device dev = q.get_device();
const size_t wgSize =
krn.get_work_group_info<info::kernel_work_group::work_group_size>(dev);
CHECK(wgSize > 0);
const size_t prefWGSizeMult = krn.get_work_group_info<
info::kernel_work_group::preferred_work_group_size_multiple>(dev);
CHECK(prefWGSizeMult > 0);
const cl_ulong prvMemSize =
krn.get_work_group_info<info::kernel_work_group::private_mem_size>(dev);
CHECK(prvMemSize == 0);
}