Skip to content

Commit bcefeb3

Browse files
authored
[SYCL][CUDA] Select only NVPTX64 device binaries (#1223)
Add the binary target identifier "nvptx64" for NVIDIA PTX devices. Search through the available binary images and select the first one for the PI_DEVICE_BINARY_TARGET_NVPTX64 ("nvptx64") target. Return PI_INVALID_BINARY if no "nvptx64" image is available. Add a LIT test to check that both backends (PI_OPENCL, PI_CUDA) work irrespective of the order of the -fsycl-targets=... arguments. Signed-off-by: Andrea Bocci <[email protected]>
1 parent 72b7dee commit bcefeb3

File tree

3 files changed

+90
-6
lines changed

3 files changed

+90
-6
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
449449
#define PI_DEVICE_BINARY_TARGET_SPIRV64_GEN "spir64_gen"
450450
#define PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga"
451451

452+
/// PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device
453+
#define PI_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64"
454+
452455
/// This struct is a record of the device binary information. If the Kind field
453456
/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec
454457
/// field can still be specific and denote e.g. FPGA target. It must match the

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -687,10 +687,10 @@ pi_result cuda_piDevicePartition(
687687
return {};
688688
}
689689

690-
pi_result cuda_piextDeviceSelectBinary(
691-
pi_device device, // TODO: does this need to be context?
692-
pi_device_binary *binaries, pi_uint32 num_binaries,
693-
pi_device_binary *selected_binary) {
690+
pi_result cuda_piextDeviceSelectBinary(pi_device device,
691+
pi_device_binary *binaries,
692+
pi_uint32 num_binaries,
693+
pi_device_binary *selected_binary) {
694694
if (!binaries) {
695695
cl::sycl::detail::pi::die("No list of device images provided");
696696
}
@@ -700,8 +700,19 @@ pi_result cuda_piextDeviceSelectBinary(
700700
if (!selected_binary) {
701701
cl::sycl::detail::pi::die("No storage for device binary provided");
702702
}
703-
*selected_binary = binaries[0];
704-
return PI_SUCCESS;
703+
704+
// Look for an image for the NVPTX64 target, and return the first one that is
705+
// found
706+
for (pi_uint32 i = 0; i < num_binaries; i++) {
707+
if (strcmp(binaries[i]->DeviceTargetSpec,
708+
PI_DEVICE_BINARY_TARGET_NVPTX64) == 0) {
709+
*selected_binary = binaries[i];
710+
return PI_SUCCESS;
711+
}
712+
}
713+
714+
// No image can be loaded for the given device
715+
return PI_INVALID_BINARY;
705716
}
706717

707718
pi_result cuda_piextGetDeviceFunctionPointer(pi_device device,
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice,nvptx64-unknown-unknown-sycldevice %s -o %t-spir64-nvptx64.out
2+
// RUN: env SYCL_BE=PI_OPENCL %t-spir64-nvptx64.out
3+
// RUN: env SYCL_BE=PI_CUDA %t-spir64-nvptx64.out
4+
// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice,spir64-unknown-unknown-sycldevice %s -o %t-nvptx64-spir64.out
5+
// RUN: env SYCL_BE=PI_OPENCL %t-nvptx64-spir64.out
6+
// RUN: env SYCL_BE=PI_CUDA %t-nvptx64-spir64.out
7+
8+
// REQUIRES: opencl, cuda
9+
10+
//==------- sycl-targets-order.cpp - SYCL -fsycl-targets order test --------==//
11+
//
12+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
13+
// See https://llvm.org/LICENSE.txt for license information.
14+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#include <CL/sycl.hpp>
19+
#include <iostream>
20+
21+
int main(int argc, char **argv) {
22+
23+
// select the default SYCL device
24+
cl::sycl::device device{cl::sycl::default_selector{}};
25+
std::cout << "Running on SYCL device " << device.get_info<cl::sycl::info::device::name>()
26+
<< ", driver version " << device.get_info<cl::sycl::info::device::driver_version>()
27+
<< std::endl;
28+
29+
// create a queue
30+
cl::sycl::queue queue{device};
31+
32+
// create a buffer of 4 ints to be used inside the kernel code
33+
cl::sycl::buffer<unsigned int, 1> buffer(4);
34+
35+
// size of the index space for the kernel
36+
cl::sycl::range<1> NumOfWorkItems{buffer.get_count()};
37+
38+
// submit a command group(work) to the queue
39+
queue.submit([&](cl::sycl::handler &cgh) {
40+
// get write only access to the buffer on a device
41+
auto accessor = buffer.get_access<cl::sycl::access::mode::write>(cgh);
42+
// executing the kernel
43+
cgh.parallel_for<class FillBuffer>(
44+
NumOfWorkItems, [=](cl::sycl::id<1> WIid) {
45+
// fill the buffer with indexes
46+
accessor[WIid] = WIid.get(0);
47+
});
48+
});
49+
50+
// get read-only access to the buffer on the host
51+
// introduce an implicit barrier waiting for queue to complete the work
52+
const auto host_accessor = buffer.get_access<cl::sycl::access::mode::read>();
53+
54+
// check the results
55+
bool mismatch = false;
56+
for (unsigned int i = 0; i < buffer.get_count(); ++i) {
57+
if (host_accessor[i] != i) {
58+
std::cout << "The result is incorrect for element: " << i
59+
<< " , expected: " << i << " , got: " << host_accessor[i]
60+
<< std::endl;
61+
mismatch = true;
62+
}
63+
}
64+
65+
if (not mismatch) {
66+
std::cout << "The results are correct!" << std::endl;
67+
}
68+
69+
return mismatch;
70+
}

0 commit comments

Comments
 (0)