Skip to content

Commit 6b4a37e

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (intel#11)
2 parents 8fdbbc4 + 3a4dd88 commit 6b4a37e

32 files changed

+818
-73
lines changed

buildbot/configure.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@ def do_configure(args):
2626
"-DLLVM_EXTERNAL_PROJECTS=sycl;llvm-spirv",
2727
"-DLLVM_EXTERNAL_SYCL_SOURCE_DIR={}".format(sycl_dir),
2828
"-DLLVM_EXTERNAL_LLVM_SPIRV_SOURCE_DIR={}".format(spirv_dir),
29-
"-DLLVM_ENABLE_PROJECTS=clang;llvm-spirv;sycl",
29+
"-DLLVM_ENABLE_PROJECTS=clang;sycl;llvm-spirv",
3030
"-DOpenCL_INCLUDE_DIR={}".format(ocl_header_dir),
3131
"-DOpenCL_LIBRARY={}".format(icd_loader_lib),
3232
"-DLLVM_BUILD_TOOLS=OFF",
3333
"-DSYCL_ENABLE_WERROR=ON",
3434
"-DLLVM_ENABLE_ASSERTIONS=ON",
3535
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
36+
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
3637
llvm_dir]
3738

3839
print(cmake_cmd)

clang/lib/CodeGen/CodeGenFunction.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,11 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
540540
if (!FD->hasAttr<OpenCLKernelAttr>())
541541
return;
542542

543+
// TODO Module identifier is not reliable for this purpose since two modules
544+
// can have the same ID, needs improvement
545+
if (getLangOpts().SYCLIsDevice)
546+
Fn->addFnAttr("sycl-module-id", Fn->getParent()->getModuleIdentifier());
547+
543548
llvm::LLVMContext &Context = getLLVMContext();
544549

545550
CGM.GenOpenCLArgMetadata(Fn, FD, this);

clang/lib/Driver/ToolChains/MSVC.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ void visualstudio::Linker::constructMSVCLibCommand(Compilation &C,
324324
CmdArgs.push_back(
325325
C.getArgs().MakeArgString(Twine("-OUT:") + Output.getFilename()));
326326

327-
SmallString<128> ExecPath(getToolChain().GetProgramPath("lib"));
327+
SmallString<128> ExecPath(getToolChain().GetProgramPath("lib.exe"));
328328
const char *Exec = C.getArgs().MakeArgString(ExecPath);
329329
C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, None));
330330
}

clang/test/CodeGenSYCL/module-id.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -std=c++11 -fsycl-is-device -disable-llvm-passes -S -emit-llvm -x c++ %s -o - | FileCheck %s
2+
3+
template <typename name, typename Func>
4+
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
5+
kernelFunc();
6+
}
7+
8+
int main() {
9+
kernel_single_task<class kernel>([]() {});
10+
return 0;
11+
}
12+
// CHECK: define spir_kernel void @{{.*}}kernel{{.*}}() #[[KERN_ATTR:[0-9]+]]
13+
14+
// CHECK: #[[KERN_ATTR]] = { {{.*}}"sycl-module-id"="{{.*}}module-id.cpp"{{.*}} }

clang/test/Driver/sycl-offload-intelfpga.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,17 @@
3030
// CHK-FPGA-IMAGE: aoc{{.*}} "-o" "[[OUTPUT5:.+\.aocx]]" "[[OUTPUT4]]" "-sycl"
3131
// CHK-FPGA-LINK: {{lib|llvm-ar}}{{.*}}
3232

33+
/// -fintelfpga -fsycl-link clang-cl specific
34+
// RUN: touch %t.obj
35+
// RUN: %clang_cl -### -clang:-target -clang:x86_64-pc-windows-msvc -fsycl -fintelfpga -fsycl-link %t.obj 2>&1 \
36+
// RUN: | FileCheck -check-prefixes=CHK-FPGA-LINK-WIN %s
37+
// CHK-FPGA-LINK-WIN: clang-offload-bundler{{.*}} "-type=o" "-targets=host-x86_64-pc-windows-msvc,sycl-spir64_fpga-unknown-{{.*}}-sycldevice{{.*}}" "-inputs=[[INPUT:.+\.obj]]" "-outputs=[[OUTPUT1:.+\.obj]],[[OUTPUT2:.+\.obj]]" "-unbundle"
38+
// CHK-FPGA-LINK-WIN: llvm-link{{.*}} "[[OUTPUT2]]" "-o" "[[OUTPUT3:.+\.bc]]"
39+
// CHK-FPGA-LINK-WIN: llvm-spirv{{.*}} "-spirv-max-version=1.1" "-spirv-ext=+all" "-o" "[[OUTPUT4:.+\.spv]]" "[[OUTPUT3]]"
40+
// CHK-FPGA-LINK-WIN: aoc{{.*}} "-o" "[[OUTPUT5:.+\.aocr]]" "[[OUTPUT4]]" "-sycl" "-rtl"
41+
// CHK-FPGA-LINK-WIN: lib.exe{{.*}}
42+
43+
3344
/// Check -fintelfpga -fsycl-link with an FPGA archive
3445
// Create the dummy archive
3546
// RUN: echo "Dummy AOCR image" > %t.aocr

sycl/CMakeLists.txt

+17-3
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,23 @@ add_custom_target( sycl-toolchain
159159
COMMENT "Building SYCL compiler toolchain..."
160160
)
161161

162-
add_subdirectory( test )
163-
add_subdirectory( unittests )
164-
add_subdirectory( tools )
162+
if (NOT DEFINED LLVM_INCLUDE_TESTS)
163+
set(LLVM_INCLUDE_TESTS ON)
164+
endif()
165+
166+
option(SYCL_INCLUDE_TESTS
167+
"Generate build targets for the SYCL unit tests."
168+
${LLVM_INCLUDE_TESTS})
169+
170+
add_subdirectory(tools)
171+
172+
if(SYCL_INCLUDE_TESTS)
173+
if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include/gtest/gtest.h)
174+
add_subdirectory(unittests)
175+
list(APPEND SYCL_TEST_DEPS SYCLUnitTests)
176+
endif()
177+
add_subdirectory(test)
178+
endif()
165179

166180
# Package deploy support
167181
# Listed here are component names contributing the package

sycl/doc/extensions/USM/USM.adoc

+3-3
Original file line numberDiff line numberDiff line change
@@ -528,9 +528,9 @@ float* a = static_cast<float*>(malloc_shared(10*sizeof(float), dev, ctxt));
528528
float* b = static_cast<float*>(malloc_shared(10*sizeof(float), dev, ctxt));
529529
float* c = static_cast<float*>(malloc_shared(10*sizeof(float), dev, ctxt));
530530
531-
queue Q;
532-
auto e = Q.submit([&](handler& cgh) {
533-
cgh.parallel_for<class vec_add>(range<1> {10}, [=](id<1> i) {
531+
auto e = q.submit([&](handler& cgh) {
532+
cgh.parallel_for<class vec_add>(range<1> {10}, [=](id<1> ID) {
533+
size_t i = ID[0];
534534
c[i] = a[i] + b[i];
535535
});
536536
});

sycl/include/CL/sycl/detail/pi.h

+21-2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef enum {
5656
PI_INVALID_CONTEXT = CL_INVALID_CONTEXT,
5757
PI_INVALID_PLATFORM = CL_INVALID_PLATFORM,
5858
PI_INVALID_DEVICE = CL_INVALID_DEVICE,
59+
PI_INVALID_BINARY = CL_INVALID_BINARY,
5960
PI_MISALIGNED_SUB_BUFFER_OFFSET = CL_MISALIGNED_SUB_BUFFER_OFFSET,
6061
PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY
6162
} _pi_result;
@@ -245,9 +246,20 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
245246
/// Target identification strings for
246247
/// pi_device_binary_struct.DeviceTargetSpec
247248
///
249+
/// A device type represented by a particular target
250+
/// triple requires specific binary images. We need
251+
/// to map the image type onto the device target triple
252+
///
248253
#define PI_DEVICE_BINARY_TARGET_UNKNOWN "<unknown>"
254+
/// SPIR-V 32-bit image <-> 32-bit OpenCL device
249255
#define PI_DEVICE_BINARY_TARGET_SPIRV32 "spir"
250-
#define PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64";
256+
/// SPIR-V 64-bit image <-> 64-bit OpenCL device
257+
#define PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64"
258+
/// Device-specific binary images produced from SPIR-V 64-bit <->
259+
/// various triples for specific 64-bit OpenCL devices
260+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 "spir64_x86_64"
261+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_GEN "spir64_gen"
262+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga"
251263

252264
/// This struct is a record of the device binary information. If the Kind field
253265
/// denotes a portable binary type (SPIRV or LLVMIR), the DeviceTargetSpec field
@@ -264,6 +276,13 @@ struct pi_device_binary_struct {
264276
/// format of the binary data - SPIRV, LLVMIR bitcode,...
265277
uint8_t Format;
266278
/// null-terminated string representation of the device's target architecture
279+
/// which holds one of:
280+
/// PI_DEVICE_BINARY_TARGET_UNKNOWN - unknown
281+
/// PI_DEVICE_BINARY_TARGET_SPIRV32 - general value for 32-bit OpenCL devices
282+
/// PI_DEVICE_BINARY_TARGET_SPIRV64 - general value for 64-bit OpenCL devices
283+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 - 64-bit OpenCL CPU device
284+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_GEN - GEN GPU device (64-bit OpenCL)
285+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA - 64-bit OpenCL FPGA device
267286
const char *DeviceTargetSpec;
268287
/// a null-terminated string; target- and compiler-specific options
269288
/// which are suggested to use to "build" program at runtime
@@ -405,7 +424,7 @@ pi_result piDevicePartition(
405424
/// and the IR characteristics.
406425
///
407426
pi_result piextDeviceSelectBinary(
408-
pi_device device, // TODO: does this need to be context?
427+
pi_device device,
409428
pi_device_binary * binaries,
410429
pi_uint32 num_binaries,
411430
pi_device_binary * selected_binary);

sycl/include/CL/sycl/detail/scheduler/commands.hpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ class Command {
6666
RELEASE,
6767
MAP_MEM_OBJ,
6868
UNMAP_MEM_OBJ,
69-
UPDATE_REQUIREMENT
69+
UPDATE_REQUIREMENT,
70+
EMPTY_TASK
7071
};
7172

7273
Command(CommandType Type, QueueImplPtr Queue, bool UseExclusiveQueue = false);
@@ -123,6 +124,23 @@ class Command {
123124
std::atomic<bool> MEnqueued;
124125
};
125126

127+
// The command does nothing during enqueue. The task can be used to implement
128+
// lock in the graph, or to merge several nodes into one.
129+
class EmptyCommand : public Command {
130+
public:
131+
EmptyCommand(QueueImplPtr Queue, Requirement *Req)
132+
: Command(CommandType::EMPTY_TASK, std::move(Queue)),
133+
MStoredRequirement(*Req) {}
134+
135+
Requirement *getStoredRequirement() { return &MStoredRequirement; }
136+
137+
private:
138+
cl_int enqueueImp() override { return CL_SUCCESS; }
139+
void printDot(std::ostream &Stream) const override;
140+
141+
Requirement MStoredRequirement;
142+
};
143+
126144
// The command enqueues release instance of memory allocated on Host or
127145
// underlying framework.
128146
class ReleaseCommand : public Command {

sycl/include/CL/sycl/detail/usm_impl.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ namespace usm {
1919
void *alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt,
2020
const device &Dev, cl::sycl::usm::alloc Kind);
2121

22-
void *alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt,
23-
cl::sycl::usm::alloc Kind);
22+
void *alignedAllocHost(size_t Alignment, size_t Bytes, const context &Ctxt,
23+
cl::sycl::usm::alloc Kind);
2424

2525
void free(void *Ptr, const context &Ctxt);
2626

sycl/include/CL/sycl/usm/usm_allocator.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class usm_allocator {
109109
usm::alloc AllocT = AllocKind,
110110
typename std::enable_if<AllocT == usm::alloc::host, int>::type = 0>
111111
pointer allocate(size_t Size) {
112-
auto Result = reinterpret_cast<pointer>(detail::usm::alignedAlloc(
112+
auto Result = reinterpret_cast<pointer>(detail::usm::alignedAllocHost(
113113
getAlignment(), Size * sizeof(value_type), mContext, AllocKind));
114114
if (!Result) {
115115
throw memory_allocation_error();

sycl/source/detail/pi_opencl.cpp

+61-10
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,16 @@ pi_result OCL(piDevicesGet)(pi_platform platform,
5959
return cast<pi_result>(result);
6060
}
6161

62-
pi_result OCL(piextDeviceSelectBinary)(
63-
pi_device device, // TODO: does this need to be context?
64-
pi_device_binary * images,
65-
pi_uint32 num_images,
66-
pi_device_binary * selected_image) {
67-
68-
// TODO dummy implementation.
62+
pi_result OCL(piextDeviceSelectBinary)(pi_device device,
63+
pi_device_binary *images,
64+
pi_uint32 num_images,
65+
pi_device_binary *selected_image) {
66+
67+
// TODO: this is a bare-bones implementation for choosing a device image
68+
// that would be compatible with the targeted device. An AOT-compiled
69+
// image is preferred over SPIRV for known devices (i.e. Intel devices)
70+
// The implementation makes no effort to differentiate between multiple images
71+
// for the given device, and simply picks the first one compatible
6972
// Real implementaion will use the same mechanism OpenCL ICD dispatcher
7073
// uses. Somthing like:
7174
// PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT);
@@ -74,8 +77,56 @@ pi_result OCL(piextDeviceSelectBinary)(
7477
// where context->dispatch is set to the dispatch table provided by PI
7578
// plugin for platform/device the ctx was created for.
7679

77-
*selected_image = num_images > 0 ? images[0] : nullptr;
78-
return PI_SUCCESS;
80+
// Choose the binary target for the provided device
81+
const char *image_target = nullptr;
82+
// Get the type of the device
83+
cl_device_type device_type;
84+
cl_int ret_err = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
85+
sizeof(cl_device_type), &device_type, nullptr);
86+
if (ret_err != CL_SUCCESS) {
87+
*selected_image = nullptr;
88+
return cast<pi_result>(ret_err);
89+
}
90+
91+
switch (device_type) {
92+
// TODO: Factor out vendor specifics into a separate source
93+
// E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp?
94+
95+
// We'll attempt to find an image that was AOT-compiled
96+
// from a SPIR-V image into an image specific for:
97+
98+
case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU
99+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64;
100+
break;
101+
case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU
102+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_GEN;
103+
break;
104+
case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA
105+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA;
106+
break;
107+
default:
108+
// Otherwise, we'll attempt to find and JIT-compile
109+
// a device-independent SPIR-V image
110+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64;
111+
break;
112+
}
113+
114+
// Find the appropriate device image, fallback to spirv if not found
115+
pi_device_binary fallback = nullptr;
116+
for (size_t i = 0; i < num_images; ++i) {
117+
if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
118+
*selected_image = images[i];
119+
return PI_SUCCESS;
120+
}
121+
if (strcmp(images[i]->DeviceTargetSpec, PI_DEVICE_BINARY_TARGET_SPIRV64) ==
122+
0)
123+
fallback = images[i];
124+
}
125+
// Points to a spirv image, if such indeed was found
126+
if ((*selected_image = fallback))
127+
return PI_SUCCESS;
128+
// No image can be loaded for the given device
129+
return PI_INVALID_BINARY;
79130
}
80131

81132
pi_result OCL(piQueueCreate)(pi_context context, pi_device device,
@@ -290,7 +341,7 @@ _PI_CL(piDeviceRetain, clRetainDevice)
290341
_PI_CL(piDeviceRelease, clReleaseDevice)
291342
_PI_CL(piextDeviceSelectBinary, OCL(piextDeviceSelectBinary))
292343
_PI_CL(piextGetDeviceFunctionPointer, OCL(piextGetDeviceFunctionPointer))
293-
// Context
344+
// Context
294345
_PI_CL(piContextCreate, clCreateContext)
295346
_PI_CL(piContextGetInfo, clGetContextInfo)
296347
_PI_CL(piContextRetain, clRetainContext)

sycl/source/detail/program_manager/program_manager.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
263263
<< getRawSyclObjImpl(Context) << ")\n";
264264
}
265265

266+
const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef();
266267
DeviceImage *Img = nullptr;
267268
bool UseKernelSpv = false;
268269
const std::string UseSpvEnv("SYCL_USE_KERNEL_SPV");
@@ -309,6 +310,10 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
309310
std::cerr << "loaded device image from " << Fname << "\n";
310311
}
311312
} else {
313+
// TODO: There may be cases with cl::sycl::program class usage in source code
314+
// that will result in a multi-device context. This case needs to be handled
315+
// here or at the program_impl class level
316+
312317
// Take all device images in module M and ask the native runtime under the
313318
// given context to choose one it prefers.
314319
auto ImgIt = m_DeviceImages.find(M);
@@ -318,8 +323,8 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
318323
}
319324
std::vector<DeviceImage *> *Imgs = (ImgIt->second).get();
320325

321-
PI_CALL(RT::piextDeviceSelectBinary(
322-
0, Imgs->data(), (cl_uint)Imgs->size(), &Img));
326+
PI_CALL(RT::piextDeviceSelectBinary(getFirstDevice(Ctx), Imgs->data(),
327+
(cl_uint)Imgs->size(), &Img));
323328

324329
if (DbgProgMgr > 0) {
325330
std::cerr << "available device images:\n";
@@ -400,7 +405,6 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
400405
// Load the selected image
401406
if (!is_device_binary_type_supported(Context, Format))
402407
throw feature_not_supported("Online compilation is not supported in this context");
403-
const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef();
404408
RT::PiProgram Res = nullptr;
405409
Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV
406410
? createSpirvProgram(Ctx, Img->BinaryStart, ImgSize)

sycl/source/detail/scheduler/commands.cpp

+18-3
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,7 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
424424
for (const auto &Dep : MDeps) {
425425
Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\""
426426
<< " [ label = \"Access mode: "
427-
<< accessModeToString(
428-
Dep.MAllocaCmd->getAllocationReq()->MAccessMode)
429-
<< "\\n"
427+
<< accessModeToString(Dep.MReq->MAccessMode) << "\\n"
430428
<< "MemObj: " << Dep.MAllocaCmd->getSYCLMemObj() << " \" ]"
431429
<< std::endl;
432430
}
@@ -467,6 +465,23 @@ cl_int MemCpyCommandHost::enqueueImp() {
467465
return CL_SUCCESS;
468466
}
469467

468+
void EmptyCommand::printDot(std::ostream &Stream) const {
469+
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#8d8f29\", label=\"";
470+
471+
Stream << "ID = " << this << "\n";
472+
Stream << "EMPTY NODE"
473+
<< "\\n";
474+
475+
Stream << "\"];" << std::endl;
476+
477+
for (const auto &Dep : MDeps) {
478+
Stream << " \"" << this << "\" -> \"" << Dep.MDepCommand << "\""
479+
<< " [ label = \"Access mode: "
480+
<< accessModeToString(Dep.MReq->MAccessMode) << "\\n"
481+
<< "MemObj: " << Dep.MReq->MSYCLMemObj << " \" ]" << std::endl;
482+
}
483+
}
484+
470485
void MemCpyCommandHost::printDot(std::ostream &Stream) const {
471486
Stream << "\"" << this << "\" [style=filled, fillcolor=\"#B6A2EB\", label=\"";
472487

sycl/source/detail/scheduler/graph_builder.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,8 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
167167
UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd});
168168
Dep->addUser(UpdateCommand);
169169
}
170-
// access::mode::read_write is always used here regardless of requieremnt
171-
// access mode because this node shouldn't be skipped.
172-
UpdateLeafs(Deps, Record, access::mode::read_write);
173-
AddNodeToLeafs(Record, UpdateCommand, access::mode::read_write);
170+
UpdateLeafs(Deps, Record, Req->MAccessMode);
171+
AddNodeToLeafs(Record, UpdateCommand, Req->MAccessMode);
174172
return UpdateCommand;
175173
}
176174

0 commit comments

Comments
 (0)