Skip to content

Commit 39c338d

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (intel#5)
2 parents 18eb83e + a818801 commit 39c338d

File tree

16 files changed

+127
-56
lines changed

16 files changed

+127
-56
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,19 @@ static void addImpliedArgs(const llvm::Triple &Triple,
511511
BeArgs.push_back("-g");
512512
if (Args.getLastArg(options::OPT_O0))
513513
BeArgs.push_back("-cl-opt-disable");
514+
// Check if floating pointing optimizations are allowed.
515+
bool isFastMath = isOptimizationLevelFast(Args);
516+
Arg *A = Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math,
517+
options::OPT_funsafe_math_optimizations,
518+
options::OPT_fno_unsafe_math_optimizations);
519+
isFastMath =
520+
isFastMath || (A && (A->getOption().getID() == options::OPT_ffast_math ||
521+
A->getOption().getID() ==
522+
options::OPT_funsafe_math_optimizations));
523+
A = Args.getLastArg(options::OPT_ffp_model_EQ);
524+
isFastMath = isFastMath || (A && StringRef(A->getValue()).equals("fast"));
525+
if (isFastMath)
526+
BeArgs.push_back("-cl-fast-relaxed-math");
514527
if (BeArgs.empty())
515528
return;
516529
if (Triple.getSubArch() == llvm::Triple::NoSubArch ||

clang/test/Driver/sycl-offload.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,5 +904,13 @@
904904
// CHECK-STD-OVR: clang{{.*}} "-emit-obj" {{.*}} "-std=c++14"
905905
// CHECK-STD-OVR-NOT: clang{{.*}} "-std=c++17"
906906

907+
// Bypass -cl-fast-relaxed-math to SPIR-V compiler.
908+
// RUN: %clang -### -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -ffast-math %s 2>&1 | FileCheck -check-prefix=CHECK-FAST-MATH-OPT %s
909+
// RUN: %clang -### -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -funsafe-math-optimizations %s 2>&1 | FileCheck -check-prefix=CHECK-FAST-MATH-OPT %s
910+
// RUN: %clang -### -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -Ofast %s 2>&1 | FileCheck -check-prefix=CHECK-FAST-MATH-OPT %s
911+
// RUN: %clang -### -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -ffp-model=fast %s 2>&1 | FileCheck -check-prefix=CHECK-FAST-MATH-OPT %s
912+
// RUN: %clang_cl -### -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice /fp:fast %s 2>&1 | FileCheck -check-prefix=CHECK-FAST-MATH-OPT %s
913+
// CHECK-FAST-MATH-OPT: clang-offload-wrapper{{.*}} "-compile-opts=-cl-fast-relaxed-math"
914+
907915
// TODO: SYCL specific fail - analyze and enable
908916
// XFAIL: windows-msvc

sycl/include/CL/sycl/detail/pi.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,12 @@ constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR;
494494
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR;
495495
constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_ALLOC = CL_MEM_ALLOC_HOST_PTR;
496496

497+
// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to
498+
// make the translation to OpenCL transparent.
499+
// TODO: populate
500+
//
501+
using pi_mem_properties = pi_bitfield;
502+
497503
// NOTE: queue properties are implemented this way to better support bit
498504
// manipulations
499505
using pi_queue_properties = pi_bitfield;
@@ -982,9 +988,9 @@ __SYCL_EXPORT pi_result piextQueueCreateWithNativeHandle(
982988
//
983989
// Memory
984990
//
985-
__SYCL_EXPORT pi_result piMemBufferCreate(pi_context context,
986-
pi_mem_flags flags, size_t size,
987-
void *host_ptr, pi_mem *ret_mem);
991+
__SYCL_EXPORT pi_result piMemBufferCreate(
992+
pi_context context, pi_mem_flags flags, size_t size, void *host_ptr,
993+
pi_mem *ret_mem, const pi_mem_properties *properties = nullptr);
988994

989995
__SYCL_EXPORT pi_result piMemImageCreate(pi_context context, pi_mem_flags flags,
990996
const pi_image_format *image_format,

sycl/include/CL/sycl/half_type.hpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,6 @@ template <> struct numeric_limits<cl::sycl::half> {
246246
static constexpr bool has_denorm_loss = false;
247247
static constexpr bool tinyness_before = false;
248248
static constexpr bool traps = false;
249-
static constexpr float half_min = 6.103515625e-05f;
250-
static constexpr float half_max = 65504.0f;
251-
static constexpr float half_eps = 9.765625e-04f;
252249
static constexpr int max_exponent10 = 4;
253250
static constexpr int max_exponent = 16;
254251
static constexpr int min_exponent10 = -4;
@@ -263,24 +260,24 @@ template <> struct numeric_limits<cl::sycl::half> {
263260
static constexpr float_round_style round_style = round_to_nearest;
264261

265262
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half(min)() noexcept {
266-
return half_min;
263+
return 6.103515625e-05f; // half minimum value
267264
}
268265

269266
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half(max)() noexcept {
270-
return half_max;
267+
return 65504.0f; // half maximum value
271268
}
272269

273270
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half lowest() noexcept {
274-
return -half_max;
271+
return -65504.0f; // -1*(half maximum value)
275272
}
276273

277274
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half epsilon() noexcept {
278-
return half_eps;
275+
return 9.765625e-04f; // half epsilon
279276
}
280277

281278
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half
282279
round_error() noexcept {
283-
return 0.5F;
280+
return 0.5f;
284281
}
285282

286283
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half infinity() noexcept {
@@ -297,7 +294,7 @@ template <> struct numeric_limits<cl::sycl::half> {
297294
}
298295

299296
static __SYCL_CONSTEXPR_ON_DEVICE const cl::sycl::half denorm_min() noexcept {
300-
return 5.96046e-08F;
297+
return 5.96046e-08f;
301298
}
302299
};
303300

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1628,9 +1628,11 @@ pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
16281628
/// \TODO Implement USE_HOST_PTR using cuHostRegister
16291629
///
16301630
pi_result cuda_piMemBufferCreate(pi_context context, pi_mem_flags flags,
1631-
size_t size, void *host_ptr, pi_mem *ret_mem) {
1631+
size_t size, void *host_ptr, pi_mem *ret_mem,
1632+
const pi_mem_properties *properties) {
16321633
// Need input memory object
16331634
assert(ret_mem != nullptr);
1635+
assert(properties == nullptr && "no mem properties goes to cuda RT yet");
16341636
// Currently, USE_HOST_PTR is not implemented using host register
16351637
// since this triggers a weird segfault after program ends.
16361638
// Setting this constant to true enables testing that behavior.

sycl/plugins/level_zero/pi_level_zero.cpp

100755100644
Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,12 @@ enum {
3636
ZeSerializeBlock =
3737
2, // blocking ZE calls, where supported (usually in enqueue commands)
3838
};
39-
static pi_uint32 ZeSerialize = 0;
39+
static const pi_uint32 ZeSerialize = [] {
40+
const char *SerializeMode = std::getenv("ZE_SERIALIZE");
41+
const pi_uint32 SerializeModeValue =
42+
SerializeMode ? std::atoi(SerializeMode) : 0;
43+
return SerializeModeValue;
44+
}();
4045

4146
// This class encapsulates actions taken along with a call to Level Zero API.
4247
class ZeCall {
@@ -693,11 +698,6 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
693698
ZeValidationLayer = true;
694699
}
695700

696-
static const char *SerializeMode = std::getenv("ZE_SERIALIZE");
697-
static const pi_uint32 SerializeModeValue =
698-
SerializeMode ? std::atoi(SerializeMode) : 0;
699-
ZeSerialize = SerializeModeValue;
700-
701701
if (NumEntries == 0 && Platforms != nullptr) {
702702
return PI_INVALID_VALUE;
703703
}
@@ -1908,12 +1908,15 @@ pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle,
19081908
}
19091909

19101910
pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
1911-
void *HostPtr, pi_mem *RetMem) {
1911+
void *HostPtr, pi_mem *RetMem,
1912+
const pi_mem_properties *properties) {
19121913

19131914
// TODO: implement read-only, write-only
19141915
assert((Flags & PI_MEM_FLAGS_ACCESS_RW) != 0);
19151916
assert(Context);
19161917
assert(RetMem);
1918+
assert(properties == nullptr &&
1919+
"no mem properties goes to Level-Zero RT yet");
19171920

19181921
void *Ptr;
19191922
ze_device_handle_t ZeDevice = Context->Devices[0]->ZeDevice;

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
5353
CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
5454
CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
5555
CONSTFIX char clMemFreeName[] = "clMemFreeINTEL";
56+
CONSTFIX char clCreateBufferWithPropertiesName[] =
57+
"clCreateBufferWithPropertiesINTEL";
5658
CONSTFIX char clSetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL";
5759
CONSTFIX char clEnqueueMemsetName[] = "clEnqueueMemsetINTEL";
5860
CONSTFIX char clEnqueueMemcpyName[] = "clEnqueueMemcpyINTEL";
@@ -516,12 +518,25 @@ pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
516518
}
517519

518520
pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
519-
void *host_ptr, pi_mem *ret_mem) {
521+
void *host_ptr, pi_mem *ret_mem,
522+
const pi_mem_properties *properties) {
520523
pi_result ret_err = PI_INVALID_OPERATION;
521-
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
522-
cast<cl_mem_flags>(flags), size,
523-
host_ptr, cast<cl_int *>(&ret_err)));
524-
524+
clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr;
525+
526+
if (properties)
527+
// First we need to look up the function pointer
528+
ret_err = getExtFuncFromContext<clCreateBufferWithPropertiesName,
529+
clCreateBufferWithPropertiesINTEL_fn>(
530+
context, &FuncPtr);
531+
532+
if (FuncPtr)
533+
*ret_mem = cast<pi_mem>(FuncPtr(cast<cl_context>(context), properties,
534+
cast<cl_mem_flags>(flags), size, host_ptr,
535+
cast<cl_int *>(&ret_err)));
536+
else
537+
*ret_mem = cast<pi_mem>(clCreateBuffer(cast<cl_context>(context),
538+
cast<cl_mem_flags>(flags), size,
539+
host_ptr, cast<cl_int *>(&ret_err)));
525540
return ret_err;
526541
}
527542

sycl/source/detail/memory_manager.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,9 @@ MemoryManager::allocateBufferObject(ContextImplPtr TargetContext, void *UserPtr,
171171

172172
RT::PiMem NewMem = nullptr;
173173
const detail::plugin &Plugin = TargetContext->getPlugin();
174-
Plugin.call<PiApiKind::piMemBufferCreate>(
175-
TargetContext->getHandleRef(), CreationFlags, Size, UserPtr, &NewMem);
174+
Plugin.call<PiApiKind::piMemBufferCreate>(TargetContext->getHandleRef(),
175+
CreationFlags, Size, UserPtr,
176+
&NewMem, nullptr);
176177
return NewMem;
177178
}
178179

sycl/test/plugins/sycl-ls-gpu-default.cpp

100755100644
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
// REQUIRES: gpu, level_zero
1+
// REQUIRES: gpu
22

3-
// RUN: sycl-ls --verbose >%t.default.out
3+
// RUN: env --unset=SYCL_BE sycl-ls --verbose >%t.default.out
44
// RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.default.out
55

6-
// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : 1.0
7-
// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : 1.0
6+
// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{.*}}Level-Zero
7+
// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{.*}}Level-Zero
88

99
//==-- sycl-ls-gpu-default.cpp - SYCL test for default selected gpu device -==//
1010
//
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// REQUIRES: gpu, level_zero
2+
3+
// RUN: sycl-ls --verbose >%t.default.out
4+
// RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.default.out
5+
6+
// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{.*}}Level-Zero
7+
// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{.*}}Level-Zero
8+
9+
//==-- sycl-ls-gpu-level-zero.cpp - Test Level-Zero selected gpu device ----==//
10+
//
11+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
12+
// See https://llvm.org/LICENSE.txt for license information.
13+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14+
//
15+
//===----------------------------------------------------------------------===//

sycl/test/plugins/sycl-ls-gpu-opencl.cpp

100755100644
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
// RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out
44
// RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out
55

6-
// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9]\.[0-9]}}
7-
// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9]\.[0-9]}}
6+
// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{.*}}OpenCL
7+
// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{.*}}OpenCL
88

9-
//==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===//
9+
//==-- sycl-ls-gpu-opencl.cpp - SYCL test for selected OpenCL GPU device --===//
1010
//
1111
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1212
// See https://llvm.org/LICENSE.txt for license information.

sycl/tools/sycl-ls/sycl-ls.cpp

100755100644
Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,11 @@ static void printDeviceInfo(const device &Device, const std::string &Prepend) {
7171
std::cout << Prepend << "Vendor : " << DeviceVendor << std::endl;
7272
std::cout << Prepend << "Driver : " << DeviceDriverVersion << std::endl;
7373
} else {
74-
std::cout << Prepend << DeviceTypeName << ": " << DeviceVersion << "[ "
75-
<< DeviceDriverVersion << " ]" << std::endl;
74+
auto DevicePlatform = Device.get_info<info::device::platform>();
75+
auto DevicePlatformName = DevicePlatform.get_info<info::platform::name>();
76+
std::cout << Prepend << DeviceTypeName << ": " << DevicePlatformName << " "
77+
<< DeviceVersion << " [" << DeviceDriverVersion << "]"
78+
<< std::endl;
7679
}
7780
}
7881

sycl/unittests/pi/EnqueueMemTest.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ class EnqueueMemTest : public testing::TestWithParam<detail::plugin> {
5050
_context, _device, 0, &_queue)),
5151
PI_SUCCESS);
5252

53-
ASSERT_EQ(
54-
(plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
55-
_context, PI_MEM_FLAGS_ACCESS_RW,
56-
_numElementsX * _numElementsY * sizeof(pi_int32), nullptr, &_mem)),
57-
PI_SUCCESS);
53+
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
54+
_context, PI_MEM_FLAGS_ACCESS_RW,
55+
_numElementsX * _numElementsY * sizeof(pi_int32), nullptr,
56+
&_mem, nullptr)),
57+
PI_SUCCESS);
5858
}
5959

6060
void TearDown() override {

sycl/unittests/pi/cuda/test_commands.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@ TEST_F(CudaCommandsTest, PIEnqueueReadBufferBlocking) {
7676
int output[memSize] = {};
7777

7878
pi_mem memObj;
79-
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
80-
context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj)),
81-
PI_SUCCESS);
79+
ASSERT_EQ(
80+
(plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
81+
context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
82+
PI_SUCCESS);
8283

8384
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(
8485
queue_, memObj, true, 0, bytes, data, 0, nullptr, nullptr)),
@@ -105,9 +106,10 @@ TEST_F(CudaCommandsTest, PIEnqueueReadBufferNonBlocking) {
105106
int output[memSize] = {};
106107

107108
pi_mem memObj;
108-
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
109-
context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj)),
110-
PI_SUCCESS);
109+
ASSERT_EQ(
110+
(plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
111+
context_, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj, nullptr)),
112+
PI_SUCCESS);
111113

112114
pi_event cpIn, cpOut;
113115
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piEnqueueMemBufferWrite>(

sycl/unittests/pi/cuda/test_kernels.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) {
240240
size_t memSize = 1024u;
241241
pi_mem memObj;
242242
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
243-
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj)),
243+
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
244+
nullptr)),
244245
PI_SUCCESS);
245246

246247
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piKernelSetArg>(
@@ -274,7 +275,8 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) {
274275
size_t memSize = 1024u;
275276
pi_mem memObj;
276277
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
277-
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj)),
278+
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
279+
nullptr)),
278280
PI_SUCCESS);
279281

280282
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piextKernelSetArgMemObj>(
@@ -316,12 +318,14 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) {
316318
size_t memSize = 1024u;
317319
pi_mem memObj;
318320
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
319-
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj)),
321+
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj,
322+
nullptr)),
320323
PI_SUCCESS);
321324

322325
pi_mem memObj2;
323326
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piMemBufferCreate>(
324-
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj2)),
327+
context_, PI_MEM_FLAGS_ACCESS_RW, memSize, nullptr, &memObj2,
328+
nullptr)),
325329
PI_SUCCESS);
326330

327331
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piextKernelSetArgMemObj>(

0 commit comments

Comments
 (0)