From 298a1890cd6ceec1df1647feb02191c255b93496 Mon Sep 17 00:00:00 2001 From: Sergey Dmitriev Date: Tue, 1 Feb 2022 03:10:58 -0800 Subject: [PATCH 1/2] [SYCL][ESIMD] Add tests for lsc mem access APIs Signed-off-by: Sergey Dmitriev --- SYCL/ESIMD/lsc/Inputs/common.hpp | 18 ++ SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp | 147 ++++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp | 134 ++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp | 160 +++++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp | 165 ++++++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp | 142 +++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp | 130 ++++++++++++++ SYCL/ESIMD/lsc/lsc_fence_pvc.cpp | 87 +++++++++ SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp | 91 ++++++++++ .../lsc/lsc_flat_atomic_cachehint_pvc.cpp | 112 ++++++++++++ SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_flat_pvc.cpp | 102 +++++++++++ SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_slm_pvc.cpp | 111 ++++++++++++ SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_surf_pvc.cpp | 108 ++++++++++++ SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp | 32 ++++ SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp | 38 ++++ SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp | 32 ++++ 37 files changed, 2347 insertions(+) create mode 100644 SYCL/ESIMD/lsc/Inputs/common.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp create mode 100644 SYCL/ESIMD/lsc/lsc_fence_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_pvc.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp diff --git a/SYCL/ESIMD/lsc/Inputs/common.hpp b/SYCL/ESIMD/lsc/Inputs/common.hpp new file mode 100644 index 0000000000..923f872970 --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/common.hpp @@ -0,0 +1,18 @@ +//==------- common.hpp - DPC++ ESIMD on-device test ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +template class KernelID; + +template T get_rand() { + T v = rand(); + if constexpr (sizeof(T) > 4) + v = (v << 32) | rand(); + return v; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp new file mode 100644 index 0000000000..0e7b5214f9 --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp @@ -0,0 +1,147 @@ +//==------- lsc_flat_load.hpp - DPC++ ESIMD on-device test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + T zero_val = (T)0; + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = old_val; + + T *in = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + in[i] = get_rand(); + + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + if constexpr (transpose) { + auto vals = lsc_block_load(in + elem_off); + lsc_block_store( + out + elem_off, vals); + } else { + simd offset(byte_off, VS * sizeof(T)); + simd pred = lsc_block_load(accp, 0); + + auto loaded = + lsc_gather(in, offset, pred); + if constexpr (DS == lsc_data_size::u8u32 || + DS == lsc_data_size::u16u32) + loaded &= vmask; + + simd vals(old_val); + simd mask(0); + for (int i = 0; i < VS; i++) + mask.template select(i * VL) = pred; + vals.merge(loaded, mask); + + lsc_scatter(out, offset, vals); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + sycl::free(in, ctx); + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + T e = in[i]; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 ? in[i] & vmask : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + sycl::free(in, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp new file mode 100644 index 0000000000..317322a15f --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp @@ -0,0 +1,134 @@ +//==------- lsc_flat_store.hpp - DPC++ ESIMD on-device test ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + T new_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = old_val; + + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + if constexpr (transpose) { + simd vals(new_val + elem_off, 1); + lsc_block_store(out + elem_off, vals); + } else { + simd offset(byte_off, VS * sizeof(T)); + simd pred = lsc_block_load(accp, 0); + + T val = new_val + elem_off; + simd vals; + for (int i = 0; i < VL; i++) + for (int j = 0; j < VS; j++) + vals.template select<1, 1>(i + j * VL) = val++; + + lsc_scatter(out, offset, vals, pred); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + T e = new_val + i; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 + ? ((new_val + i) & vmask) | (old_val & ~vmask) + : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp new file mode 100644 index 0000000000..881f2a544b --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp @@ -0,0 +1,160 @@ +//==------- lsc_slm_load.hpp - DPC++ ESIMD on-device test ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + T new_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = 0; + + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + constexpr uint16_t gran = 4; // using oword write x1 to init SLM + constexpr uint16_t slm_blocks = // number of owords per Thread + VL * VS / gran + ((VL * VS) % gran ? 1 : 0); + constexpr uint16_t slm_ud_per_group = slm_blocks * Threads * gran; + constexpr uint16_t slm_size_per_group = + slm_ud_per_group * sizeof(T); + constexpr uint16_t slm_size = slm_size_per_group * Groups; + + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + slm_init(slm_size); + if (ndi.get_local_id(0) == 0) { + uint32_t groupID = ndi.get_group(0); + uint32_t group_off = groupID * slm_size_per_group; + simd slm_val(new_val + groupID * slm_ud_per_group, 1); + for (int i = 0; i < slm_size_per_group; i += gran * sizeof(T)) { + slm_block_store(i + group_off, slm_val); + slm_val += gran; + } + } + + barrier(); + + if constexpr (transpose) { + auto vals = lsc_slm_block_load(byte_off); + lsc_block_store(out + elem_off, vals); + } else { + simd pred = lsc_block_load(accp, 0); + simd offset(byte_off, VS * sizeof(T)); + + auto loaded = + lsc_slm_gather(offset, pred); + + if constexpr (DS == lsc_data_size::u8u32 || + DS == lsc_data_size::u16u32) + loaded &= vmask; + + simd vals(old_val); + for (int i = 0; i < VS; i++) + vals.template select(i * VL).merge( + loaded.template select(i * VL), pred); + + lsc_scatter(out, offset, vals); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + T e = new_val + i; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 ? (new_val + i) & vmask : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp new file mode 100644 index 0000000000..141d13b40c --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp @@ -0,0 +1,165 @@ +//==------- lsc_slm_store.hpp - DPC++ ESIMD on-device test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + T new_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = 0; + + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + constexpr uint16_t gran = 4; // using oword write x1 to init SLM + constexpr uint16_t slm_blocks = // number of owords per Thread + VL * VS / gran + ((VL * VS) % gran ? 1 : 0); + constexpr uint16_t slm_ud_per_group = slm_blocks * Threads * gran; + constexpr uint16_t slm_size_per_group = + slm_ud_per_group * sizeof(T); + constexpr uint16_t slm_size = slm_size_per_group * Groups; + + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + slm_init(slm_size); + if (ndi.get_local_id(0) == 0) { + uint32_t group_off = ndi.get_group(0) * slm_size_per_group; + simd slm_val(old_val); + for (int i = 0; i < slm_size_per_group; i += gran * sizeof(T)) + slm_block_store(i + group_off, slm_val); + } + + barrier(); + + if constexpr (transpose) { + simd vals(new_val + elem_off, 1); + lsc_slm_block_store(byte_off, vals); + + barrier(); + + auto ret = lsc_slm_block_load(byte_off); + lsc_block_store(out + elem_off, ret); + } else { + T val = new_val + elem_off; + simd vals; + for (int i = 0; i < VL; i++) + for (int j = 0; j < VS; j++) + vals.template select<1, 1>(i + j * VL) = val++; + + simd pred = lsc_block_load(accp, 0); + simd offset(byte_off, VS * sizeof(T)); + + lsc_slm_scatter(offset, vals, pred); + + barrier(); + + auto ret = lsc_slm_gather( + offset); + lsc_scatter(out, offset, ret); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + T e = new_val + i; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 + ? ((new_val + i) & vmask) | (old_val & ~vmask) + : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp new file mode 100644 index 0000000000..df61743adc --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp @@ -0,0 +1,142 @@ +//==------- lsc_surf_load.hpp - DPC++ ESIMD on-device test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + std::vector out(Size, old_val); + std::vector in(Size); + for (int i = 0; i < Size; i++) + in[i] = get_rand(); + + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufo(out.data(), out.size()); + buffer bufi(in.data(), in.size()); + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto acco = bufo.template get_access(cgh); + auto acci = bufi.template get_access(cgh); + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + if constexpr (transpose) { + auto vals = lsc_block_load(acci, byte_off); + lsc_block_store( + acco, byte_off, vals); + } else { + simd offset(byte_off, VS * sizeof(T)); + simd pred = lsc_block_load(accp, 0); + + auto loaded = + lsc_gather(acci, offset, pred); + if constexpr (DS == lsc_data_size::u8u32 || + DS == lsc_data_size::u16u32) + loaded &= vmask; + + simd vals(old_val); + simd mask; + for (int i = 0; i < VS; i++) + mask.template select(i * VL) = pred; + vals.merge(loaded, mask); + + lsc_scatter(acco, offset, vals); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + if (out[i] != in[i]) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)in[i] << std::dec + << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 ? in[i] & vmask : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp new file mode 100644 index 0000000000..8c8284111d --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp @@ -0,0 +1,130 @@ +//==------- lsc_surf_store.hpp - DPC++ ESIMD on-device test ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +template +bool test(uint32_t pmask = 0xffffffff) { + static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); + if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { + static_assert(!transpose, "Conversion types may not use vector"); + static_assert(VS == 1, "Only D32 and D64 support vector load"); + } + + static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); + static_assert(sizeof(T) >= 4, + "D8 and D16 are valid only in 2D block load/store"); + + uint16_t Size = Groups * Threads * VL * VS; + + T vmask = (T)-1; + if constexpr (DS == lsc_data_size::u8u32) + vmask = (T)0xff; + if constexpr (DS == lsc_data_size::u16u32) + vmask = (T)0xffff; + if constexpr (DS == lsc_data_size::u16u32h) + vmask = (T)0xffff0000; + + T old_val = get_rand(); + T new_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + std::vector out(Size, old_val); + std::vector p(VL, 0); + if constexpr (!transpose) + for (int i = 0; i < VL; i++) + p[i] = (pmask >> i) & 1; + + try { + buffer bufo(out.data(), out.size()); + buffer bufp(p.data(), p.size()); + + auto e = q.submit([&](handler &cgh) { + auto acco = bufo.template get_access(cgh); + auto accp = bufp.template get_access(cgh); + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t elem_off = globalID * VL * VS; + uint32_t byte_off = elem_off * sizeof(T); + + if constexpr (transpose) { + simd vals(new_val + elem_off, 1); + lsc_block_store(acco, byte_off, vals); + } else { + simd offset(byte_off, VS * sizeof(T)); + simd pred = lsc_block_load(accp, 0); + + T val = new_val + elem_off; + simd vals; + for (int i = 0; i < VL; i++) + for (int j = 0; j < VS; j++) + vals.template select<1, 1>(i + j * VL) = val++; + + lsc_scatter(acco, offset, vals, pred); + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + return false; + } + + bool passed = true; + + if constexpr (transpose) { + for (int i = 0; i < Size; i++) { + T e = new_val + i; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } else { + for (int i = 0; i < Size; i++) { + T e = (pmask >> ((i / VS) % VL)) & 1 + ? ((new_val + i) & vmask) | (old_val & ~vmask) + : old_val; + if (out[i] != e) { + passed = false; + std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + return passed; +} diff --git a/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp b/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp new file mode 100644 index 0000000000..a927b5d212 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp @@ -0,0 +1,87 @@ +//==------- lsc_fence_pvc.cpp - DPC++ ESIMD on-device test -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test checks lsc_fence intrinsic. + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +int main() { + using namespace cl::sycl; + using namespace sycl::ext::intel::experimental::esimd; + auto size = size_t{512}; + unsigned constexpr SIMDSize = 8; + + queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + auto *res_vec = malloc_shared(size, q); + std::fill(res_vec, res_vec + size, 0); + + try { + q.submit([&](handler &h) { + h.parallel_for( + range<2>{size / SIMDSize, 2}, [=](id<2> id) SYCL_ESIMD_KERNEL { + // Basically this kernel is an example from wiki: + // https://en.wikipedia.org/wiki/Memory_barrier#Example + slm_init(8192); + auto offset = id[0] * SIMDSize; + auto byte_offset = offset * sizeof(int); + auto cond_offset = size * sizeof(int) + byte_offset; + if (id[1] % 2 == 0) { + // First thread: write data and condition + // and provoke gpu to reorder instructions + auto data = simd(offset, 1); + lsc_slm_block_store(byte_offset, data * 10); + lsc_slm_block_store(byte_offset, data * 5); + lsc_slm_block_store(byte_offset, data); + // Protect from reordering for the last two instructions + lsc_fence(); + lsc_slm_block_store(cond_offset, + simd(1)); + } else { + auto condition = simd(0); + int imax = 1000; + int i = 0; + while (condition[0] == 0 && i < imax) { + condition = lsc_slm_block_load(cond_offset); + ++i; + } + // Protect from reordering for the while cycle and data read + lsc_fence(); + auto data = lsc_slm_block_load(byte_offset); + lsc_block_store(res_vec + offset, data); + } + }); + }); + q.wait(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + free(res_vec, q); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += std::abs(res_vec[i] - i); + } + std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + free(res_vec, q); + return error; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp b/SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp new file mode 100644 index 0000000000..681c731d09 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp @@ -0,0 +1,91 @@ +//==------- lsc_flat_2d_pvc.cpp - DPC++ ESIMD on-device test ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test checks 2d flat lsc intrinsics + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include +#include + +int main() { + using namespace cl::sycl; + using namespace sycl::ext::intel::experimental::esimd; + unsigned data_height = 4; + unsigned data_width = 9; + unsigned data_pitch = 16; + unsigned x = 0; + unsigned y = 0; + unsigned size = data_height * data_pitch; + + queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + auto *input = malloc_shared(size, q); + std::iota(input, input + size, 0); + + constexpr unsigned Width = 4; + constexpr unsigned Height = 4; + constexpr unsigned NumBlocks = 1; + auto *block_store = malloc_shared(size, q); + + auto *ref = new int[size]; + // Fill dst and ref data which is untouched with random values + for (int i = 0; i < size; i++) + block_store[i] = ref[i] = rand() % 128; + + for (int i = 0; i < Height; i++) { + for (int j = 0; j < Width; j++) { + ref[y * data_pitch + i * data_pitch + x + j] = + input[y * data_pitch + i * data_pitch + x + j]; + } + } + try { + q.submit([&](handler &h) { + h.parallel_for( + range<1>{1}, [=](id<1> id) SYCL_ESIMD_KERNEL { + lsc_prefetch2d( + input, (data_width * sizeof(int)) - 1, data_height - 1, + (data_pitch * sizeof(int)) - 1, x, y); + auto data = lsc_load2d( + input, (data_width * sizeof(int)) - 1, data_height - 1, + (data_pitch * sizeof(int)) - 1, x, y); + lsc_store2d( + block_store, (data_width * sizeof(int)) - 1, data_height - 1, + (data_pitch * sizeof(int)) - 1, x, y, data); + }); + }); + q.wait(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + free(input, q); + free(block_store, q); + return 1; + } + + auto error = 0; + for (auto i = 0; i < size; ++i) + error += std::abs(ref[i] - block_store[i]); + free(input, q); + free(block_store, q); + std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + return error; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp b/SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp new file mode 100644 index 0000000000..01e709e0b9 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp @@ -0,0 +1,112 @@ +//==---- lsc_flat_atomic_cachehint_pvc.cpp - DPC++ ESIMD on-device test ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -DESIMD_GEN12_7 -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include + +class Test; + +#define DTYPE float + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +ESIMD_INLINE void atomic_add_float(DTYPE *sA, simd_mask<16> M) { + simd offsets( + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + simd mat({0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, + 0.5, 0.5, 0.5, 0.5, 0.5}); + lsc_atomic_update( + (float *)sA, offsets * sizeof(float), mat, M); +} + +int main(void) { + constexpr unsigned Size = 256; + constexpr unsigned VL = 16; + constexpr size_t LocalRange = 4; + constexpr size_t GlobalRange = 64; + + queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); + + auto dev = q.get_device(); + std::cout << "Running on " << dev.get_info() << "\n"; + auto ctxt = q.get_context(); + + DTYPE *A = malloc_shared(VL, q); + DTYPE *B = malloc_shared(VL, q); + DTYPE *C = malloc_shared(VL, q); + DTYPE *D = malloc_shared(VL, q); + + for (unsigned i = 0; i < VL; ++i) { + A[i] = 0; + B[i] = GlobalRange * 0.5; // expect changes in all elements + C[i] = 0; + D[i] = (i & 1) ? B[i] : 0; // expect changes in elements with odd indices + } + + nd_range<1> Range(range<1>{GlobalRange}, range<1>{LocalRange}); + + std::vector kernelId1 = {get_kernel_id()}; + setenv("SYCL_PROGRAM_COMPILE_OPTIONS", "-vc-codegen -doubleGRF", 1); + auto inputBundle1 = get_kernel_bundle(ctxt, kernelId1); + auto exeBundle1 = build(inputBundle1); + try { + q.submit([&](handler &cgh) { + cgh.use_kernel_bundle(exeBundle1); + cgh.parallel_for(Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL { + atomic_add_float(A, 1); + simd_mask<16> M({0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}); + atomic_add_float(C, M); + }); + }).wait(); + } catch (sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + free(A, q); + free(B, q); + free(C, q); + free(D, q); + return 1; + } + + int err_cnt = 0; + for (unsigned i = 0; i < VL; ++i) { + if (A[i] != B[i]) { + if (++err_cnt < 10) + std::cerr << "A == B failed at " << i << ": " << A[i] << " != " << B[i] + << "\n"; + } + if (C[i] != D[i]) { + if (++err_cnt < 10) + std::cerr << "C == D failed at " << i << ": " << C[i] << " != " << D[i] + << "\n"; + } + } + if (err_cnt > 0) { + std::cout << " pass rate: " << ((float)(VL - err_cnt) / (float)VL) * 100.0f + << "% (" << (Size - err_cnt) << "/" << Size << ")\n"; + } + + std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n"); + free(A, q); + free(B, q); + free(C, q); + free(D, q); + + return err_cnt > 0 ? 1 : 0; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp new file mode 100644 index 0000000000..1eaa79411c --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_flat_load_u16u32.cpp - DPC++ ESIMD on-device test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_load.hpp" + +constexpr uint32_t seed = 186; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp new file mode 100644 index 0000000000..119519bcba --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_flat_load_u32.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_load.hpp" + +constexpr uint32_t seed = 188; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp new file mode 100644 index 0000000000..22b5cd700e --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_flat_load_u64.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_load.hpp" + +constexpr uint32_t seed = 187; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp new file mode 100644 index 0000000000..0259755c47 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_flat_load_u8u32.cpp - DPC++ ESIMD on-device test -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_load.hpp" + +constexpr uint32_t seed = 185; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_pvc.cpp b/SYCL/ESIMD/lsc/lsc_flat_pvc.cpp new file mode 100644 index 0000000000..9252a584e4 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_pvc.cpp @@ -0,0 +1,102 @@ +//==------- lsc_flat_pvc.cpp - DPC++ ESIMD on-device test ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test checks 1d flat lsc intrinsics + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +int main() { + using namespace cl::sycl; + using namespace sycl::ext::intel::experimental::esimd; + auto size = size_t{128}; + auto constexpr SIMDSize = unsigned{4}; + + auto q = + queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + auto *vec_0 = malloc_shared(size, q); + auto *vec_1 = malloc_shared(size, q); + auto *vec_2 = malloc_shared(size, q); + auto *vec_3 = malloc_shared(size, q); + auto *vec_4 = malloc_shared(size, q); + std::iota(vec_0, vec_0 + size, 0); + std::iota(vec_1, vec_1 + size, 0); + std::iota(vec_2, vec_2 + size, 0); + std::iota(vec_3, vec_3 + size, 0); + std::iota(vec_4, vec_4 + size, 0); + + try { + q.submit([&](handler &h) { + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize; + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto pred = simd_mask(1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); + auto swap = compare * 2; + + lsc_prefetch(vec_0 + + offset); + auto data_0 = lsc_block_load(vec_0 + offset); + lsc_block_store(vec_0 + offset, data_0 * 2); + + lsc_prefetch(vec_1, + offsets); + auto data_1 = lsc_gather(vec_1, offsets); + lsc_scatter(vec_1, offsets, data_1 * 2); + + lsc_atomic_update(vec_2, offsets, pred); + lsc_atomic_update(vec_3, offsets, add, pred); + lsc_atomic_update(vec_4, offsets, compare, + swap, pred); + }); + }); + q.wait(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + sycl::free(vec_0, q); + sycl::free(vec_1, q); + sycl::free(vec_2, q); + sycl::free(vec_3, q); + sycl::free(vec_4, q); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += std::abs(vec_0[i] - 2 * i); + error += std::abs(vec_1[i] - 2 * i); + error += std::abs(vec_2[i] - (i + 1)); + error += std::abs(vec_3[i] - (i + 5)); + error += std::abs(vec_4[i] - (i * 2)); + } + sycl::free(vec_0, q); + sycl::free(vec_1, q); + sycl::free(vec_2, q); + sycl::free(vec_3, q); + sycl::free(vec_4, q); + std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + return error; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp new file mode 100644 index 0000000000..d64e8f219e --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_flat_store_u16u32.cpp - DPC++ ESIMD on-device test ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_store.hpp" + +constexpr uint32_t seed = 286; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp new file mode 100644 index 0000000000..781125b323 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_flat_store_u32.cpp - DPC++ ESIMD on-device test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_store.hpp" + +constexpr uint32_t seed = 288; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp new file mode 100644 index 0000000000..2c11cc5a77 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_flat_store_u64.cpp - DPC++ ESIMD on-device test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_store.hpp" + +constexpr uint32_t seed = 287; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp new file mode 100644 index 0000000000..22c81a7e05 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_flat_store_u8u32.cpp - DPC++ ESIMD on-device test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_flat_store.hpp" + +constexpr uint32_t seed = 285; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp new file mode 100644 index 0000000000..f7133f3055 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_slm_load_u16u32.cpp - DPC++ ESIMD on-device test -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_load.hpp" + +constexpr uint32_t seed = 175; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp new file mode 100644 index 0000000000..d923b41ab7 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_slm_load_u32.cpp - DPC++ ESIMD on-device test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_load.hpp" + +constexpr uint32_t seed = 177; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp new file mode 100644 index 0000000000..4655a3d752 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_slm_load_u64.cpp - DPC++ ESIMD on-device test --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_load.hpp" + +constexpr uint32_t seed = 176; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint64_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint64_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint64_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp new file mode 100644 index 0000000000..33875a4d02 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_slm_load_u8u32.cpp - DPC++ ESIMD on-device test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_load.hpp" + +constexpr uint32_t seed = 174; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_pvc.cpp b/SYCL/ESIMD/lsc/lsc_slm_pvc.cpp new file mode 100644 index 0000000000..a3ed61ba58 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_pvc.cpp @@ -0,0 +1,111 @@ +//==------- lsc_slm_pvc.cpp - DPC++ ESIMD on-device test -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test checks 1d slm lsc intrinsics + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +int main() { + using namespace cl::sycl; + using namespace sycl::ext::intel::experimental::esimd; + auto size = size_t{128}; + auto constexpr SIMDSize = unsigned{4}; + + auto q = + queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + auto vec_0 = std::vector(size); + auto vec_1 = std::vector(size); + auto vec_2 = std::vector(size); + auto vec_3 = std::vector(size); + auto vec_4 = std::vector(size); + auto buf_0 = buffer{vec_0}; + auto buf_1 = buffer{vec_1}; + auto buf_2 = buffer{vec_2}; + auto buf_3 = buffer{vec_3}; + auto buf_4 = buffer{vec_4}; + + try { + q.submit([&](handler &h) { + auto access_0 = buf_0.template get_access(h); + auto access_1 = buf_1.template get_access(h); + auto access_2 = buf_2.template get_access(h); + auto access_3 = buf_3.template get_access(h); + auto access_4 = buf_4.template get_access(h); + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id * SIMDSize * sizeof(int); + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto data = simd(id * SIMDSize, 1); + auto pred = simd_mask(1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); + auto swap = compare * 2; + + slm_init(4096); + lsc_slm_block_store(offset, data * 2); + auto data_0 = lsc_slm_block_load(offset); + lsc_block_store(access_0, offset, data_0); + + lsc_slm_scatter(offsets, data * 2); + auto data_1 = lsc_slm_gather(offsets); + lsc_block_store(access_1, offset, data_1); + + lsc_slm_block_store(offset, data); + lsc_slm_atomic_update(offsets, pred); + auto data_2 = lsc_slm_block_load(offset); + lsc_block_store(access_2, offset, data_2); + + lsc_slm_block_store(offset, data); + lsc_slm_atomic_update(offsets, add, pred); + auto data_3 = lsc_slm_block_load(offset); + lsc_block_store(access_3, offset, data_3); + + lsc_slm_block_store(offset, data); + lsc_slm_atomic_update(offsets, compare, + swap, pred); + auto data_4 = lsc_slm_block_load(offset); + lsc_block_store(access_4, offset, data_4); + }); + }); + q.wait(); + buf_0.template get_access(); + buf_1.template get_access(); + buf_2.template get_access(); + buf_3.template get_access(); + buf_4.template get_access(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += std::abs(vec_0[i] - (i * 2)); + error += std::abs(vec_1[i] - (i * 2)); + error += std::abs(vec_2[i] - (i + 1)); + error += std::abs(vec_3[i] - (i + 5)); + error += std::abs(vec_4[i] - (i * 2)); + } + std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + return error; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp new file mode 100644 index 0000000000..d4b3ba375a --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_slm_store_u16u32.cpp - DPC++ ESIMD on-device test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_store.hpp" + +constexpr uint32_t seed = 275; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp new file mode 100644 index 0000000000..952e96b1b6 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_slm_store_u32.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_store.hpp" + +constexpr uint32_t seed = 277; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp new file mode 100644 index 0000000000..a40cfacf05 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_slm_store_u64.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_store.hpp" + +constexpr uint32_t seed = 276; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint64_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint64_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint64_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp new file mode 100644 index 0000000000..8cedb81e5b --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_slm_store_u8u32.cpp - DPC++ ESIMD on-device test -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_slm_store.hpp" + +constexpr uint32_t seed = 274; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp new file mode 100644 index 0000000000..0107ccb62f --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_surf_load_u16u32.cpp - DPC++ ESIMD on-device test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 197; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp new file mode 100644 index 0000000000..8cecc3da72 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_surf_load_u32.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 199; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp new file mode 100644 index 0000000000..c226986c7c --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_surf_load_u64.cpp - DPC++ ESIMD on-device test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 198; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp new file mode 100644 index 0000000000..3800c56c17 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_surf_load_u8u32.cpp - DPC++ ESIMD on-device test -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 196; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_pvc.cpp b/SYCL/ESIMD/lsc/lsc_surf_pvc.cpp new file mode 100644 index 0000000000..6050d2f82e --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_pvc.cpp @@ -0,0 +1,108 @@ +//==------- lsc_surf_pvc.cpp - DPC++ ESIMD on-device test ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This test checks 1d surf lsc intrinsics + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -DESIMD_GEN12_7 -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "../esimd_test_utils.hpp" + +#include +#include +#include +#include +#include + +int main() { + using namespace cl::sycl; + using namespace sycl::ext::intel::experimental::esimd; + auto size = size_t{128}; + auto constexpr SIMDSize = unsigned{4}; + + auto q = + queue{esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()}; + auto device = q.get_device(); + std::cout << "Device name: " << device.get_info() + << std::endl; + + auto vec_0 = std::vector(size); + auto vec_1 = std::vector(size); + auto vec_2 = std::vector(size); + auto vec_3 = std::vector(size); + auto vec_4 = std::vector(size); + std::iota(vec_0.begin(), vec_0.end(), 0); + std::iota(vec_1.begin(), vec_1.end(), 0); + std::iota(vec_2.begin(), vec_2.end(), 0); + std::iota(vec_3.begin(), vec_3.end(), 0); + std::iota(vec_4.begin(), vec_4.end(), 0); + auto buf_0 = buffer{vec_0}; + auto buf_1 = buffer{vec_1}; + auto buf_2 = buffer{vec_2}; + auto buf_3 = buffer{vec_3}; + auto buf_4 = buffer{vec_4}; + + try { + q.submit([&](handler &h) { + auto access_0 = buf_0.template get_access(h); + auto access_1 = buf_1.template get_access(h); + auto access_2 = buf_2.template get_access(h); + auto access_3 = buf_3.template get_access(h); + auto access_4 = buf_4.template get_access(h); + h.parallel_for( + range<1>{size / SIMDSize}, [=](id<1> id) SYCL_ESIMD_KERNEL { + auto offset = id[0] * SIMDSize * sizeof(int); + auto offsets = simd(id * SIMDSize * sizeof(int), + sizeof(int)); + auto pred = simd_mask(1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); + auto swap = compare * 2; + + lsc_prefetch(access_0, + offset); + auto data_0 = lsc_block_load(access_0, offset); + lsc_block_store(access_0, offset, data_0 * 2); + + lsc_prefetch(access_1, + offsets); + auto data_1 = lsc_gather(access_1, offsets); + lsc_scatter(access_1, offsets, data_1 * 2); + + lsc_atomic_update(access_2, offsets, pred); + lsc_atomic_update(access_3, offsets, add, + pred); + lsc_atomic_update(access_4, offsets, + compare, swap, pred); + }); + }); + q.wait(); + buf_0.template get_access(); + buf_1.template get_access(); + buf_2.template get_access(); + buf_3.template get_access(); + buf_4.template get_access(); + } catch (sycl::exception e) { + std::cout << "SYCL exception caught: " << e.what(); + return 1; + } + + auto error = 0; + for (auto i = 0; i != size; ++i) { + error += std::abs(vec_0[i] - (i * 2)); + error += std::abs(vec_1[i] - (i * 2)); + error += std::abs(vec_2[i] - (i + 1)); + error += std::abs(vec_3[i] - (i + 5)); + error += std::abs(vec_4[i] - (i * 2)); + } + std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + return error; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp new file mode 100644 index 0000000000..788ea1397f --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_surf_store_u16u32.cpp - DPC++ ESIMD on-device test ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_store.hpp" + +constexpr uint32_t seed = 297; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp new file mode 100644 index 0000000000..f7222d53d2 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp @@ -0,0 +1,38 @@ +//==------- lsc_surf_store_u32.cpp - DPC++ ESIMD on-device test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_store.hpp" + +constexpr uint32_t seed = 299; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp new file mode 100644 index 0000000000..7320733866 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp @@ -0,0 +1,38 @@ +//==------- lsc_surf_store_u64.cpp - DPC++ ESIMD on-device test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_store.hpp" + +constexpr uint32_t seed = 298; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); + // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail + // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp new file mode 100644 index 0000000000..c991dfa0d1 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp @@ -0,0 +1,32 @@ +//==------- lsc_surf_store_u8u32.cpp - DPC++ ESIMD on-device test ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: gpu-intel-pvc +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_store.hpp" + +constexpr uint32_t seed = 296; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} From 497f51eb7efd1d3cb3e07acc2a97faebc2a309da Mon Sep 17 00:00:00 2001 From: Sergey Dmitriev Date: Wed, 30 Mar 2022 03:36:09 -0700 Subject: [PATCH 2/2] Applied suggestions from code review, updated tests after the recent API changes, etc --- SYCL/ESIMD/lsc/Inputs/lsc_block_load.hpp | 274 ++++++++++++++++++ SYCL/ESIMD/lsc/Inputs/lsc_block_store.hpp | 110 +++++++ SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp | 160 ---------- SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp | 165 ----------- SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp | 61 ++-- SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp | 19 +- .../{lsc_flat_load.hpp => lsc_usm_load.hpp} | 56 ++-- .../{lsc_flat_store.hpp => lsc_usm_store.hpp} | 21 +- SYCL/ESIMD/lsc/lsc_block_load_u16.cpp | 43 +++ SYCL/ESIMD/lsc/lsc_block_load_u32.cpp | 39 +++ SYCL/ESIMD/lsc/lsc_block_load_u64.cpp | 38 +++ SYCL/ESIMD/lsc/lsc_block_load_u8.cpp | 40 +++ SYCL/ESIMD/lsc/lsc_block_prefetch_u16.cpp | 50 ++++ SYCL/ESIMD/lsc/lsc_block_prefetch_u32.cpp | 48 +++ SYCL/ESIMD/lsc/lsc_block_prefetch_u64.cpp | 48 +++ SYCL/ESIMD/lsc/lsc_block_prefetch_u8.cpp | 50 ++++ SYCL/ESIMD/lsc/lsc_block_store_u16.cpp | 27 ++ SYCL/ESIMD/lsc/lsc_block_store_u32.cpp | 27 ++ SYCL/ESIMD/lsc/lsc_block_store_u8.cpp | 27 ++ SYCL/ESIMD/lsc/lsc_fence_pvc.cpp | 87 ------ SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp | 38 --- SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp | 32 -- SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp | 38 --- .../lsc/{lsc_slm_pvc.cpp => lsc_slm.cpp} | 21 +- SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp | 32 -- SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp | 38 --- SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp | 32 -- SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp | 38 --- SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp | 32 -- .../lsc/{lsc_surf_pvc.cpp => lsc_surf.cpp} | 31 +- SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp | 2 +- SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp | 7 +- SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp | 25 +- SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp | 2 +- SYCL/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp | 35 +++ SYCL/ESIMD/lsc/lsc_surf_prefetch_u32.cpp | 40 +++ SYCL/ESIMD/lsc/lsc_surf_prefetch_u64.cpp | 40 +++ SYCL/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp | 35 +++ SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp | 2 +- SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp | 7 +- SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp | 25 +- SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp | 2 +- .../lsc/{lsc_flat_pvc.cpp => lsc_usm.cpp} | 29 +- .../{lsc_flat_2d_pvc.cpp => lsc_usm_2d.cpp} | 17 +- ...t_pvc.cpp => lsc_usm_atomic_cachehint.cpp} | 15 +- ...oad_u16u32.cpp => lsc_usm_load_u16u32.cpp} | 6 +- ...flat_load_u32.cpp => lsc_usm_load_u32.cpp} | 11 +- ..._slm_load_u64.cpp => lsc_usm_load_u64.cpp} | 15 +- ..._load_u8u32.cpp => lsc_usm_load_u8u32.cpp} | 6 +- SYCL/ESIMD/lsc/lsc_usm_prefetch_u16u32.cpp | 35 +++ SYCL/ESIMD/lsc/lsc_usm_prefetch_u32.cpp | 40 +++ SYCL/ESIMD/lsc/lsc_usm_prefetch_u64.cpp | 40 +++ SYCL/ESIMD/lsc/lsc_usm_prefetch_u8u32.cpp | 35 +++ ...re_u16u32.cpp => lsc_usm_store_u16u32.cpp} | 8 +- ...at_store_u32.cpp => lsc_usm_store_u32.cpp} | 11 +- ...lm_store_u64.cpp => lsc_usm_store_u64.cpp} | 13 +- ...tore_u8u32.cpp => lsc_usm_store_u8u32.cpp} | 6 +- 57 files changed, 1342 insertions(+), 889 deletions(-) create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_block_load.hpp create mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_block_store.hpp delete mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp delete mode 100644 SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp rename SYCL/ESIMD/lsc/Inputs/{lsc_flat_load.hpp => lsc_usm_load.hpp} (77%) rename SYCL/ESIMD/lsc/Inputs/{lsc_flat_store.hpp => lsc_usm_store.hpp} (90%) create mode 100644 SYCL/ESIMD/lsc/lsc_block_load_u16.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_load_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_load_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_load_u8.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_prefetch_u16.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_prefetch_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_prefetch_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_prefetch_u8.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_store_u16.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_store_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_block_store_u8.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_fence_pvc.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp rename SYCL/ESIMD/lsc/{lsc_slm_pvc.cpp => lsc_slm.cpp} (88%) delete mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp delete mode 100644 SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp rename SYCL/ESIMD/lsc/{lsc_surf_pvc.cpp => lsc_surf.cpp} (82%) create mode 100644 SYCL/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_prefetch_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_prefetch_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp rename SYCL/ESIMD/lsc/{lsc_flat_pvc.cpp => lsc_usm.cpp} (81%) rename SYCL/ESIMD/lsc/{lsc_flat_2d_pvc.cpp => lsc_usm_2d.cpp} (85%) rename SYCL/ESIMD/lsc/{lsc_flat_atomic_cachehint_pvc.cpp => lsc_usm_atomic_cachehint.cpp} (88%) rename SYCL/ESIMD/lsc/{lsc_flat_load_u16u32.cpp => lsc_usm_load_u16u32.cpp} (87%) rename SYCL/ESIMD/lsc/{lsc_flat_load_u32.cpp => lsc_usm_load_u32.cpp} (81%) rename SYCL/ESIMD/lsc/{lsc_slm_load_u64.cpp => lsc_usm_load_u64.cpp} (75%) rename SYCL/ESIMD/lsc/{lsc_flat_load_u8u32.cpp => lsc_usm_load_u8u32.cpp} (87%) create mode 100644 SYCL/ESIMD/lsc/lsc_usm_prefetch_u16u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_usm_prefetch_u32.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_usm_prefetch_u64.cpp create mode 100644 SYCL/ESIMD/lsc/lsc_usm_prefetch_u8u32.cpp rename SYCL/ESIMD/lsc/{lsc_slm_store_u16u32.cpp => lsc_usm_store_u16u32.cpp} (86%) rename SYCL/ESIMD/lsc/{lsc_flat_store_u32.cpp => lsc_usm_store_u32.cpp} (81%) rename SYCL/ESIMD/lsc/{lsc_slm_store_u64.cpp => lsc_usm_store_u64.cpp} (79%) rename SYCL/ESIMD/lsc/{lsc_flat_store_u8u32.cpp => lsc_usm_store_u8u32.cpp} (87%) diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_block_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_block_load.hpp new file mode 100644 index 0000000000..f96ae3f35a --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_block_load.hpp @@ -0,0 +1,274 @@ +//==---------------- lsc_block_load.hpp - DPC++ ESIMD on-device test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; +using namespace sycl::ext::intel::esimd::detail; +using namespace sycl::ext::intel::experimental::esimd; +using namespace sycl::ext::intel::experimental::esimd::detail; + +template +bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, + int X, int Y) { + // Some restrictions based on documentation + static_assert(!(Transposed && Transformed), + "Transposed and transformed is not supported"); + static_assert(BlockWidth > 0, "Block width must be positive"); + static_assert(BlockHeight > 0, "Block height must be positive"); + + if constexpr (Transposed) { + static_assert(NBlocks == 1, "Transposed expected to be 1 block only"); + static_assert(sizeof(T) >= 4, "Transposed can only use D32 and D64"); + if constexpr (sizeof(T) == 4) { + static_assert(BlockWidth <= 8, + "D32 transposed allow only block width 8 and less"); + static_assert(BlockHeight <= 32, + "D32 transposed allow only block height 32 and less"); + } + if constexpr (sizeof(T) == 8) { + static_assert(BlockWidth == 1 || BlockWidth == 2 || BlockWidth == 4, + "D64 transposed allow only block width 1/2/4"); + static_assert(BlockHeight == 8, + "D64 transposed allow only block height 8"); + } + } else if constexpr (Transformed) { + static_assert(sizeof(T) <= 2, "Transformed can only use D8 and D16"); + if constexpr (sizeof(T) == 2 && NBlocks == 4) { + static_assert(BlockWidth <= 8, + "Transformed D16x4 allow only block width 8 and less"); + } + static_assert((sizeof(T) * BlockWidth) % 4 == 0, + "Transformed block width must be aligned by DW"); + static_assert(BlockWidth <= 16, + "Transformed block width must be 16 and less"); + static_assert(BlockWidth >= (4 / sizeof(T)), + "Minimal transformed block width depends on data size"); + static_assert(BlockHeight <= 32, + "Transformed block height must be 32 and less"); + static_assert(BlockHeight >= (4 / sizeof(T)), + "Minimal transformed block height depends on data size"); + } else { + static_assert((sizeof(T) * BlockWidth) % 4 == 0, + "Block width must be aligned by DW"); + static_assert(sizeof(T) * BlockWidth * NBlocks <= 64, + "Total block width must be 64B or less"); + static_assert(BlockHeight <= 32, "Block height must be 32 or less"); + if constexpr (sizeof(T) == 4) { + static_assert(NBlocks < 4, "D32 restricted to use 1 or 2 blocks only"); + } + if constexpr (sizeof(T) == 8) { + static_assert(NBlocks < 2, "D64 restricted to use 1 block only"); + } + } + + constexpr int N = + get_lsc_block_2d_data_size(); + /* Due to store2d a is subject to stricter restrictions: + * NBlocks always 1, no Transposed, no Transformed, max BlockHeight 8. + * Series of 2d stores with height 1 are used to write loaded data to output + * buffer. Also Transformed load2d extends BlockWidth to the next power of 2 + * and rounds up BlockHeight. + */ + constexpr int SH = Transformed + ? roundUpNextMultiple() + : BlockHeight; + constexpr int SW = Transformed ? getNextPowerOf2() : BlockWidth; + constexpr int SN = get_lsc_block_2d_data_size(); + + std::cout << "N = " << N << std::endl; + std::cout << "SN = " << SN << std::endl; + std::cout << "W = " << BlockWidth << " SW = " << SW << std::endl; + std::cout << "H = " << BlockHeight << " SH = " << SH << std::endl; + + T old_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + unsigned SurfaceSize = SurfacePitch * SurfaceHeight * NBlocks; + unsigned Size = SurfaceSize * Groups * Threads; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = old_val; + + T *in = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + in[i] = get_rand(); + + try { + auto e = q.submit([&](handler &cgh) { + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t off = globalID * SurfaceSize; + + unsigned width = SurfaceWidth * sizeof(T) - 1; + unsigned height = SurfaceHeight - 1; + unsigned pitch = SurfacePitch * sizeof(T) - 1; + + simd vals; + if constexpr (use_prefetch) { + lsc_prefetch2d( + in + off, width, height, pitch, X, Y); + vals = + lsc_load2d(in + off, width, height, pitch, X, Y); + } else { + vals = lsc_load2d(in + off, width, height, + pitch, X, Y); + } + + for (int i = 0; i < NBlocks; i++) { + for (int j = 0; j < SH; j++) { + simd v = + vals.template select(i * SN * SH + j * SW); + lsc_store2d( + out + off, SurfaceWidth * sizeof(T) - 1, SurfaceHeight - 1, + SurfacePitch * sizeof(T) - 1, X + i * SW, Y + j, v); + } + } + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + sycl::free(in, ctx); + return false; + } + + bool passed = true; + + if constexpr (Transposed) { + for (int gid = 0; gid < Groups * Threads; gid++) { + int dx = 0, dy = 0; + for (int j = 0; j < SurfaceHeight; j++) { + for (int i = 0; i < SurfacePitch; i++) { + T e = old_val; + // index in linear buffer + int idx = i + j * SurfacePitch + gid * SurfaceSize; + + // check if inside block + if ((i >= X) && (i < X + BlockWidth) && (j >= Y) && + (j < Y + BlockHeight)) { + if (i < SurfaceWidth) { + if (X + dx < SurfaceWidth) + e = in[(X + dx) + (Y + dy) * SurfacePitch + gid * SurfaceSize]; + else + e = (T)0; + } + dy += 1; + if (dy == BlockHeight) { + dy = 0; + dx += 1; + } + } + + if (out[idx] != e) { + passed = false; + std::cout << "out" << idx << "] = 0x" << std::hex + << (uint64_t)out[idx] << " vs etalon = 0x" << (uint64_t)e + << std::dec << std::endl; + } + } + } + } + } else if constexpr (Transformed) { + constexpr int scale = 4 / sizeof(T); + for (int gid = 0; gid < Groups * Threads; gid++) { + for (int j = 0; j < SurfaceHeight; j++) { + for (int i = 0; i < SurfacePitch; i++) { + T e = old_val; + // index in linear buffer + int idx = i + j * SurfacePitch + gid * SurfaceSize; + + // check if inside block + if ((i >= X) && (i < X + SW * NBlocks) && (j >= Y) && (j < Y + SH)) { + int di = i - X; + int dj = j - Y; + int bn = di / SW; + + int dx, dy; + dx = di / scale + bn * (BlockWidth - SW / scale) + + (dj % scale) * SW / scale; + dy = dj + di % scale - dj % scale; + + if (i < SurfaceWidth) { + if (dx < BlockWidth * (bn + 1) && (dx + X) < SurfaceWidth && + (dy + Y) < SurfaceHeight) + e = in[(X + dx) + (Y + dy) * SurfacePitch + gid * SurfaceSize]; + else + e = (T)0; + } + } + + if (out[idx] != e) { + passed = false; + std::cout << std::hex << "out[0x" << idx << "] = 0x" + << (uint64_t)out[idx] << " vs etalon = 0x" << (uint64_t)e + << std::dec << std::endl; + } + } + } + } + } else { + for (int gid = 0; gid < Groups * Threads; gid++) { + for (int j = 0; j < SurfaceHeight; j++) { + for (int i = 0; i < SurfacePitch; i++) { + T e = old_val; + // index in linear buffer + int idx = i + j * SurfacePitch + gid * SurfaceSize; + + // check if inside block + if ((i >= X) && (i < X + BlockWidth * NBlocks) && + (i < SurfaceWidth) && (j >= Y) && (j < Y + BlockHeight)) + e = in[idx]; + + if (out[idx] != e) { + passed = false; + std::cout << "out[" << idx << "] = 0x" << std::hex + << (uint64_t)out[idx] << " vs etalon = 0x" << (uint64_t)e + << std::dec << std::endl; + } + } + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + sycl::free(in, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_block_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_block_store.hpp new file mode 100644 index 0000000000..8d5e327e56 --- /dev/null +++ b/SYCL/ESIMD/lsc/Inputs/lsc_block_store.hpp @@ -0,0 +1,110 @@ +//==---------------- lsc_block_store.hpp - DPC++ ESIMD on-device test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include "common.hpp" + +using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; +using namespace sycl::ext::intel::experimental::esimd; +using namespace sycl::ext::intel::experimental::esimd::detail; + +template (), + cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none> +bool test(unsigned SurfaceWidth, unsigned SurfaceHeight, unsigned SurfacePitch, + int X, int Y) { + static_assert(BlockWidth > 0, "Block width must be positive"); + static_assert(BlockHeight > 0, "Block height must be positive"); + static_assert((sizeof(T) * BlockWidth) % 4 == 0, + "Block width must be aligned by DW"); + static_assert(sizeof(T) * BlockWidth <= 64, + "Block width must be 64B or less"); + static_assert(BlockHeight <= 8, "Block height must be 8 or less"); + + T old_val = get_rand(); + T new_val = get_rand(); + + auto GPUSelector = gpu_selector{}; + auto q = queue{GPUSelector}; + auto dev = q.get_device(); + std::cout << "Running case #" << case_num << " on " + << dev.get_info() << "\n"; + auto ctx = q.get_context(); + + // workgroups + cl::sycl::range<1> GlobalRange{Groups}; + // threads in each group + cl::sycl::range<1> LocalRange{Threads}; + cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; + + unsigned SurfaceSize = SurfacePitch * SurfaceHeight; + unsigned Size = SurfaceSize * Groups * Threads; + + T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); + for (int i = 0; i < Size; i++) + out[i] = old_val; + + try { + auto e = q.submit([&](handler &cgh) { + cgh.parallel_for>( + Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { + uint16_t globalID = ndi.get_global_id(0); + uint32_t off = globalID * SurfaceSize; + + simd vals(new_val + off, 1); + // IUT + lsc_store2d( + out + off, SurfaceWidth * sizeof(T) - 1, SurfaceHeight - 1, + SurfacePitch * sizeof(T) - 1, X, Y, vals); + }); + }); + e.wait(); + } catch (cl::sycl::exception const &e) { + std::cout << "SYCL exception caught: " << e.what() << '\n'; + sycl::free(out, ctx); + return false; + } + + bool passed = true; + for (int gid = 0; gid < Groups * Threads; gid++) { + T val = new_val + gid * SurfaceSize; + + for (int j = 0; j < SurfaceHeight; j++) { + for (int i = 0; i < SurfacePitch; i++) { + T e = old_val; + // check if inside block + if ((i >= X) && (i < X + BlockWidth) && (i < SurfaceWidth) && + (j >= Y) && (j < Y + BlockHeight)) + e = val++; + + // index in linear buffer + int idx = i + j * SurfacePitch + gid * SurfaceSize; + if (out[idx] != e) { + passed = false; + std::cout << "out[" << idx << "] = 0x" << std::hex + << (uint64_t)out[idx] << " vs etalon = 0x" << (uint64_t)e + << std::dec << std::endl; + } + } + } + } + + if (!passed) + std::cout << "Case #" << case_num << " FAILED" << std::endl; + + sycl::free(out, ctx); + + return passed; +} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp deleted file mode 100644 index 881f2a544b..0000000000 --- a/SYCL/ESIMD/lsc/Inputs/lsc_slm_load.hpp +++ /dev/null @@ -1,160 +0,0 @@ -//==------- lsc_slm_load.hpp - DPC++ ESIMD on-device test ------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include - -#include "common.hpp" - -using namespace cl::sycl; -using namespace sycl::ext::intel::experimental::esimd; - -template -bool test(uint32_t pmask = 0xffffffff) { - static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); - if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { - static_assert(!transpose, "Conversion types may not use vector"); - static_assert(VS == 1, "Only D32 and D64 support vector load"); - } - - static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); - static_assert(sizeof(T) >= 4, - "D8 and D16 are valid only in 2D block load/store"); - - uint16_t Size = Groups * Threads * VL * VS; - - T vmask = (T)-1; - if constexpr (DS == lsc_data_size::u8u32) - vmask = (T)0xff; - if constexpr (DS == lsc_data_size::u16u32) - vmask = (T)0xffff; - if constexpr (DS == lsc_data_size::u16u32h) - vmask = (T)0xffff0000; - - T old_val = get_rand(); - T new_val = get_rand(); - - auto GPUSelector = gpu_selector{}; - auto q = queue{GPUSelector}; - auto dev = q.get_device(); - std::cout << "Running case #" << case_num << " on " - << dev.get_info() << "\n"; - auto ctx = q.get_context(); - - // workgroups - cl::sycl::range<1> GlobalRange{Groups}; - // threads in each group - cl::sycl::range<1> LocalRange{Threads}; - cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; - - T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); - for (int i = 0; i < Size; i++) - out[i] = 0; - - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; - - try { - buffer bufp(p.data(), p.size()); - - auto e = q.submit([&](handler &cgh) { - auto accp = bufp.template get_access(cgh); - cgh.parallel_for>( - Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { - constexpr uint16_t gran = 4; // using oword write x1 to init SLM - constexpr uint16_t slm_blocks = // number of owords per Thread - VL * VS / gran + ((VL * VS) % gran ? 1 : 0); - constexpr uint16_t slm_ud_per_group = slm_blocks * Threads * gran; - constexpr uint16_t slm_size_per_group = - slm_ud_per_group * sizeof(T); - constexpr uint16_t slm_size = slm_size_per_group * Groups; - - uint16_t globalID = ndi.get_global_id(0); - uint32_t elem_off = globalID * VL * VS; - uint32_t byte_off = elem_off * sizeof(T); - - slm_init(slm_size); - if (ndi.get_local_id(0) == 0) { - uint32_t groupID = ndi.get_group(0); - uint32_t group_off = groupID * slm_size_per_group; - simd slm_val(new_val + groupID * slm_ud_per_group, 1); - for (int i = 0; i < slm_size_per_group; i += gran * sizeof(T)) { - slm_block_store(i + group_off, slm_val); - slm_val += gran; - } - } - - barrier(); - - if constexpr (transpose) { - auto vals = lsc_slm_block_load(byte_off); - lsc_block_store(out + elem_off, vals); - } else { - simd pred = lsc_block_load(accp, 0); - simd offset(byte_off, VS * sizeof(T)); - - auto loaded = - lsc_slm_gather(offset, pred); - - if constexpr (DS == lsc_data_size::u8u32 || - DS == lsc_data_size::u16u32) - loaded &= vmask; - - simd vals(old_val); - for (int i = 0; i < VS; i++) - vals.template select(i * VL).merge( - loaded.template select(i * VL), pred); - - lsc_scatter(out, offset, vals); - } - }); - }); - e.wait(); - } catch (cl::sycl::exception const &e) { - std::cout << "SYCL exception caught: " << e.what() << '\n'; - sycl::free(out, ctx); - return false; - } - - bool passed = true; - - if constexpr (transpose) { - for (int i = 0; i < Size; i++) { - T e = new_val + i; - if (out[i] != e) { - passed = false; - std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] - << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; - } - } - } else { - for (int i = 0; i < Size; i++) { - T e = (pmask >> ((i / VS) % VL)) & 1 ? (new_val + i) & vmask : old_val; - if (out[i] != e) { - passed = false; - std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] - << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; - } - } - } - - if (!passed) - std::cout << "Case #" << case_num << " FAILED" << std::endl; - - sycl::free(out, ctx); - - return passed; -} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp deleted file mode 100644 index 141d13b40c..0000000000 --- a/SYCL/ESIMD/lsc/Inputs/lsc_slm_store.hpp +++ /dev/null @@ -1,165 +0,0 @@ -//==------- lsc_slm_store.hpp - DPC++ ESIMD on-device test -----------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include - -#include "common.hpp" - -using namespace cl::sycl; -using namespace sycl::ext::intel::experimental::esimd; - -template -bool test(uint32_t pmask = 0xffffffff) { - static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); - if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { - static_assert(!transpose, "Conversion types may not use vector"); - static_assert(VS == 1, "Only D32 and D64 support vector load"); - } - - static_assert(DS != lsc_data_size::u16u32h, "D16U32h not supported in HW"); - static_assert(sizeof(T) >= 4, - "D8 and D16 are valid only in 2D block load/store"); - - uint16_t Size = Groups * Threads * VL * VS; - - T vmask = (T)-1; - if constexpr (DS == lsc_data_size::u8u32) - vmask = (T)0xff; - if constexpr (DS == lsc_data_size::u16u32) - vmask = (T)0xffff; - if constexpr (DS == lsc_data_size::u16u32h) - vmask = (T)0xffff0000; - - T old_val = get_rand(); - T new_val = get_rand(); - - auto GPUSelector = gpu_selector{}; - auto q = queue{GPUSelector}; - auto dev = q.get_device(); - std::cout << "Running case #" << case_num << " on " - << dev.get_info() << "\n"; - auto ctx = q.get_context(); - - // workgroups - cl::sycl::range<1> GlobalRange{Groups}; - // threads in each group - cl::sycl::range<1> LocalRange{Threads}; - cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; - - T *out = static_cast(sycl::malloc_shared(Size * sizeof(T), dev, ctx)); - for (int i = 0; i < Size; i++) - out[i] = 0; - - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; - - try { - buffer bufp(p.data(), p.size()); - - auto e = q.submit([&](handler &cgh) { - auto accp = bufp.template get_access(cgh); - cgh.parallel_for>( - Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { - constexpr uint16_t gran = 4; // using oword write x1 to init SLM - constexpr uint16_t slm_blocks = // number of owords per Thread - VL * VS / gran + ((VL * VS) % gran ? 1 : 0); - constexpr uint16_t slm_ud_per_group = slm_blocks * Threads * gran; - constexpr uint16_t slm_size_per_group = - slm_ud_per_group * sizeof(T); - constexpr uint16_t slm_size = slm_size_per_group * Groups; - - uint16_t globalID = ndi.get_global_id(0); - uint32_t elem_off = globalID * VL * VS; - uint32_t byte_off = elem_off * sizeof(T); - - slm_init(slm_size); - if (ndi.get_local_id(0) == 0) { - uint32_t group_off = ndi.get_group(0) * slm_size_per_group; - simd slm_val(old_val); - for (int i = 0; i < slm_size_per_group; i += gran * sizeof(T)) - slm_block_store(i + group_off, slm_val); - } - - barrier(); - - if constexpr (transpose) { - simd vals(new_val + elem_off, 1); - lsc_slm_block_store(byte_off, vals); - - barrier(); - - auto ret = lsc_slm_block_load(byte_off); - lsc_block_store(out + elem_off, ret); - } else { - T val = new_val + elem_off; - simd vals; - for (int i = 0; i < VL; i++) - for (int j = 0; j < VS; j++) - vals.template select<1, 1>(i + j * VL) = val++; - - simd pred = lsc_block_load(accp, 0); - simd offset(byte_off, VS * sizeof(T)); - - lsc_slm_scatter(offset, vals, pred); - - barrier(); - - auto ret = lsc_slm_gather( - offset); - lsc_scatter(out, offset, ret); - } - }); - }); - e.wait(); - } catch (cl::sycl::exception const &e) { - std::cout << "SYCL exception caught: " << e.what() << '\n'; - sycl::free(out, ctx); - return false; - } - - bool passed = true; - - if constexpr (transpose) { - for (int i = 0; i < Size; i++) { - T e = new_val + i; - if (out[i] != e) { - passed = false; - std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] - << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; - } - } - } else { - for (int i = 0; i < Size; i++) { - T e = (pmask >> ((i / VS) % VL)) & 1 - ? ((new_val + i) & vmask) | (old_val & ~vmask) - : old_val; - if (out[i] != e) { - passed = false; - std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] - << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; - } - } - } - - if (!passed) - std::cout << "Case #" << case_num << " FAILED" << std::endl; - - sycl::free(out, ctx); - - return passed; -} diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp index df61743adc..0e2eb55b86 100644 --- a/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp +++ b/SYCL/ESIMD/lsc/Inputs/lsc_surf_load.hpp @@ -7,19 +7,21 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include "common.hpp" using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; template + cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, + bool use_prefetch = false> bool test(uint32_t pmask = 0xffffffff) { static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { @@ -31,6 +33,12 @@ bool test(uint32_t pmask = 0xffffffff) { static_assert(sizeof(T) >= 4, "D8 and D16 are valid only in 2D block load/store"); + if constexpr (!transpose && VS > 1) { + static_assert(VL == 16 || VL == 32, + "IGC prohibits execution size less than SIMD size when " + "vector size is greater than 1"); + } + uint16_t Size = Groups * Threads * VL * VS; T vmask = (T)-1; @@ -61,20 +69,13 @@ bool test(uint32_t pmask = 0xffffffff) { for (int i = 0; i < Size; i++) in[i] = get_rand(); - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; - try { buffer bufo(out.data(), out.size()); buffer bufi(in.data(), in.size()); - buffer bufp(p.data(), p.size()); auto e = q.submit([&](handler &cgh) { auto acco = bufo.template get_access(cgh); auto acci = bufi.template get_access(cgh); - auto accp = bufp.template get_access(cgh); cgh.parallel_for>( Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { uint16_t globalID = ndi.get_global_id(0); @@ -82,28 +83,36 @@ bool test(uint32_t pmask = 0xffffffff) { uint32_t byte_off = elem_off * sizeof(T); if constexpr (transpose) { - auto vals = lsc_block_load(acci, byte_off); - lsc_block_store( + simd vals; + if constexpr (use_prefetch) { + lsc_prefetch(acci, byte_off); + vals = lsc_block_load(acci, byte_off); + } else { + vals = lsc_block_load(acci, byte_off); + } + lsc_block_store( acco, byte_off, vals); } else { simd offset(byte_off, VS * sizeof(T)); - simd pred = lsc_block_load(accp, 0); + simd_mask pred; + for (int i = 0; i < VL; i++) + pred.template select<1, 1>(i) = (pmask >> i) & 1; + + simd vals; + if constexpr (use_prefetch) { + lsc_prefetch(acci, offset, pred); + vals = lsc_gather(acci, offset, pred); + } else { + vals = lsc_gather(acci, offset, pred); + } - auto loaded = - lsc_gather(acci, offset, pred); if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) - loaded &= vmask; - - simd vals(old_val); - simd mask; - for (int i = 0; i < VS; i++) - mask.template select(i * VL) = pred; - vals.merge(loaded, mask); + vals &= vmask; lsc_scatter(acco, offset, vals); + cache_hint::none, VL>(acco, offset, vals, pred); } }); }); @@ -117,11 +126,11 @@ bool test(uint32_t pmask = 0xffffffff) { if constexpr (transpose) { for (int i = 0; i < Size; i++) { - if (out[i] != in[i]) { + T e = in[i]; + if (out[i] != e) { passed = false; std::cout << "out[" << i << "] = 0x" << std::hex << (uint64_t)out[i] - << " vs etalon = 0x" << (uint64_t)in[i] << std::dec - << std::endl; + << " vs etalon = 0x" << (uint64_t)e << std::dec << std::endl; } } } else { diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp index 8c8284111d..25066a8bd0 100644 --- a/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp +++ b/SYCL/ESIMD/lsc/Inputs/lsc_surf_store.hpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include "common.hpp" using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; template = 4, "D8 and D16 are valid only in 2D block load/store"); + if constexpr (!transpose && VS > 1) { + static_assert(VL == 16 || VL == 32, + "IGC prohibits execution size less than SIMD size when " + "vector size is greater than 1"); + } + uint16_t Size = Groups * Threads * VL * VS; T vmask = (T)-1; @@ -58,18 +65,12 @@ bool test(uint32_t pmask = 0xffffffff) { cl::sycl::nd_range<1> Range{GlobalRange * LocalRange, LocalRange}; std::vector out(Size, old_val); - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; try { buffer bufo(out.data(), out.size()); - buffer bufp(p.data(), p.size()); auto e = q.submit([&](handler &cgh) { auto acco = bufo.template get_access(cgh); - auto accp = bufp.template get_access(cgh); cgh.parallel_for>( Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { uint16_t globalID = ndi.get_global_id(0); @@ -81,7 +82,9 @@ bool test(uint32_t pmask = 0xffffffff) { lsc_block_store(acco, byte_off, vals); } else { simd offset(byte_off, VS * sizeof(T)); - simd pred = lsc_block_load(accp, 0); + simd_mask pred; + for (int i = 0; i < VL; i++) + pred.template select<1, 1>(i) = (pmask >> i) & 1; T val = new_val + elem_off; simd vals; diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp similarity index 77% rename from SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp rename to SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp index 0e7b5214f9..23f03a039c 100644 --- a/SYCL/ESIMD/lsc/Inputs/lsc_flat_load.hpp +++ b/SYCL/ESIMD/lsc/Inputs/lsc_usm_load.hpp @@ -7,19 +7,21 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include "common.hpp" using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; template + cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, + bool use_prefetch = false> bool test(uint32_t pmask = 0xffffffff) { static_assert((VL == 1) || !transpose, "Transpose must have exec size 1"); if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) { @@ -31,6 +33,12 @@ bool test(uint32_t pmask = 0xffffffff) { static_assert(sizeof(T) >= 4, "D8 and D16 are valid only in 2D block load/store"); + if constexpr (!transpose && VS > 1) { + static_assert(VL == 16 || VL == 32, + "IGC prohibits execution size less than SIMD size when " + "vector size is greater than 1"); + } + uint16_t Size = Groups * Threads * VL * VS; T vmask = (T)-1; @@ -65,16 +73,8 @@ bool test(uint32_t pmask = 0xffffffff) { for (int i = 0; i < Size; i++) in[i] = get_rand(); - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; - try { - buffer bufp(p.data(), p.size()); - auto e = q.submit([&](handler &cgh) { - auto accp = bufp.template get_access(cgh); cgh.parallel_for>( Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { uint16_t globalID = ndi.get_global_id(0); @@ -82,28 +82,36 @@ bool test(uint32_t pmask = 0xffffffff) { uint32_t byte_off = elem_off * sizeof(T); if constexpr (transpose) { - auto vals = lsc_block_load(in + elem_off); - lsc_block_store( + simd vals; + if constexpr (use_prefetch) { + lsc_prefetch(in + elem_off); + vals = lsc_block_load(in + elem_off); + } else { + vals = lsc_block_load(in + elem_off); + } + lsc_block_store( out + elem_off, vals); } else { simd offset(byte_off, VS * sizeof(T)); - simd pred = lsc_block_load(accp, 0); + simd_mask pred; + for (int i = 0; i < VL; i++) + pred.template select<1, 1>(i) = (pmask >> i) & 1; + + simd vals; + if constexpr (use_prefetch) { + lsc_prefetch(in, offset, pred); + vals = lsc_gather(in, offset, pred); + } else { + vals = lsc_gather(in, offset, pred); + } - auto loaded = - lsc_gather(in, offset, pred); if constexpr (DS == lsc_data_size::u8u32 || DS == lsc_data_size::u16u32) - loaded &= vmask; - - simd vals(old_val); - simd mask(0); - for (int i = 0; i < VS; i++) - mask.template select(i * VL) = pred; - vals.merge(loaded, mask); + vals &= vmask; lsc_scatter(out, offset, vals); + cache_hint::none, VL>(out, offset, vals, pred); } }); }); diff --git a/SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp b/SYCL/ESIMD/lsc/Inputs/lsc_usm_store.hpp similarity index 90% rename from SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp rename to SYCL/ESIMD/lsc/Inputs/lsc_usm_store.hpp index 317322a15f..b29cd9c85a 100644 --- a/SYCL/ESIMD/lsc/Inputs/lsc_flat_store.hpp +++ b/SYCL/ESIMD/lsc/Inputs/lsc_usm_store.hpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include "common.hpp" using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; template = 4, "D8 and D16 are valid only in 2D block load/store"); + if constexpr (!transpose && VS > 1) { + static_assert(VL == 16 || VL == 32, + "IGC prohibits execution size less than SIMD size when " + "vector size is greater than 1"); + } + uint16_t Size = Groups * Threads * VL * VS; T vmask = (T)-1; @@ -61,16 +68,8 @@ bool test(uint32_t pmask = 0xffffffff) { for (int i = 0; i < Size; i++) out[i] = old_val; - std::vector p(VL, 0); - if constexpr (!transpose) - for (int i = 0; i < VL; i++) - p[i] = (pmask >> i) & 1; - try { - buffer bufp(p.data(), p.size()); - auto e = q.submit([&](handler &cgh) { - auto accp = bufp.template get_access(cgh); cgh.parallel_for>( Range, [=](cl::sycl::nd_item<1> ndi) SYCL_ESIMD_KERNEL { uint16_t globalID = ndi.get_global_id(0); @@ -82,7 +81,9 @@ bool test(uint32_t pmask = 0xffffffff) { lsc_block_store(out + elem_off, vals); } else { simd offset(byte_off, VS * sizeof(T)); - simd pred = lsc_block_load(accp, 0); + simd_mask pred; + for (int i = 0; i < VL; i++) + pred.template select<1, 1>(i) = (pmask >> i) & 1; T val = new_val + elem_off; simd vals; diff --git a/SYCL/ESIMD/lsc/lsc_block_load_u16.cpp b/SYCL/ESIMD/lsc/lsc_block_load_u16.cpp new file mode 100644 index 0000000000..184098b42a --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_load_u16.cpp @@ -0,0 +1,43 @@ +//==---------------- lsc_block_load_u16.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint16_t; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 32>(24, 64, 64, 6, 21); + passed &= test<2, T, 2, 2, 8, 4>(16, 16, 32, 2, 5); + passed &= test<3, T, 1, 1, 8, 4, 2>(16, 7, 32, 4, 1); + + // transformed + passed &= test<4, T, 1, 1, 6, 10, 1, false, true>(18, 10, 32, 12, 0); + passed &= test<5, T, 1, 1, 8, 4, 4, false, true>(16, 10, 32, 6, 5); + passed &= test<6, T, 1, 1, 16, 2, 2, false, true>(32, 4, 32, 4, 1); + passed &= test<7, T, 1, 1, 2, 16, 2, false, true>(4, 20, 32, 0, 3); + passed &= test<8, T, 1, 1, 16, 32, 1, false, true>(24, 50, 32, 4, 14); + passed &= test<9, T, 1, 1, 6, 4, 4, false, true>(32, 10, 32, 4, 0); + passed &= test<10, T, 1, 1, 6, 4, 2, false, true>(16, 10, 32, 4, 0); + passed &= test<11, T, 1, 1, 4, 8, 2, false, true>(16, 10, 32, 4, 0); + passed &= test<12, T, 1, 1, 2, 16, 4, false, true>(16, 10, 32, 4, 0); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_block_load_u32.cpp new file mode 100644 index 0000000000..73fdbd6a3b --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_load_u32.cpp @@ -0,0 +1,39 @@ +//==---------------- lsc_block_load_u32.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint32_t; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 4>(16, 16, 32, 2, 1); + passed &= test<2, T, 2, 2, 8, 4>(16, 16, 16, 1, 5); + passed &= test<3, T, 1, 1, 8, 2, 2>(16, 4, 16, 3, 1); + + // transposed + passed &= test<4, T, 2, 2, 1, 16, 1, true>(16, 20, 16, 1, 2); + passed &= test<5, T, 1, 1, 2, 8, 1, true>(12, 10, 16, 10, 1); + passed &= test<6, T, 1, 1, 4, 8, 1, true>(16, 10, 24, 11, 1); + passed &= test<7, T, 1, 1, 3, 8, 1, true>(16, 10, 20, 11, 1); + passed &= test<8, T, 1, 1, 8, 2, 1, true>(16, 6, 32, 10, 3); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_block_load_u64.cpp new file mode 100644 index 0000000000..f789a5103a --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_load_u64.cpp @@ -0,0 +1,38 @@ +//==---------------- lsc_block_load_u64.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint64_t; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 8, 32>(8, 32, 8, 0, 0); + passed &= test<2, T, 2, 2, 8, 4>(16, 16, 16, 1, 5); + passed &= test<3, T, 1, 1, 4, 2>(16, 4, 16, 3, 1); + + // transposed + passed &= test<4, T, 1, 1, 1, 8, 1, true>(16, 10, 16, 1, 2); + passed &= test<5, T, 1, 1, 2, 8, 1, true>(16, 10, 16, 10, 1); + passed &= test<6, T, 1, 1, 4, 8, 1, true>(16, 10, 16, 11, 1); + passed &= test<7, T, 2, 2, 4, 8, 1, true>(16, 9, 16, 1, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_load_u8.cpp b/SYCL/ESIMD/lsc/lsc_block_load_u8.cpp new file mode 100644 index 0000000000..9ec2342fcb --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_load_u8.cpp @@ -0,0 +1,40 @@ +//==---------------- lsc_block_load_u8.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint8_t; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 32, 2>(40, 64, 64, 4, 21); + passed &= test<2, T, 2, 2, 8, 8, 2>(16, 16, 64, 8, 5); + passed &= test<3, T, 1, 1, 8, 32, 2>(16, 80, 64, 4, 1); + + // transformed + passed &= test<4, T, 1, 1, 16, 4, 4, false, true>(100, 10, 128, 16, 5); + passed &= test<5, T, 1, 1, 12, 20, 1, false, true>(16, 40, 64, 0, 0); + passed &= test<6, T, 1, 1, 16, 4, 2, false, true>(32, 4, 64, 4, 1); + passed &= test<7, T, 2, 2, 4, 16, 2, false, true>(4, 20, 64, 0, 3); + passed &= test<8, T, 1, 1, 16, 32, 1, false, true>(24, 80, 64, 4, 14); + passed &= test<9, T, 1, 1, 16, 4, 4, false, true>(64, 10, 64, 0, 0); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_prefetch_u16.cpp b/SYCL/ESIMD/lsc/lsc_block_prefetch_u16.cpp new file mode 100644 index 0000000000..2399a8ddd0 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_prefetch_u16.cpp @@ -0,0 +1,50 @@ +//==------------ lsc_block_prefetch_u16.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint16_t; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 32, 1, false, false, L1H, L3H, true>( + 24, 64, 64, 6, 21); + passed &= + test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 32, 2, 5); + passed &= + test<3, T, 1, 1, 8, 4, 2, false, false, L1H, L3H, true>(16, 7, 32, 4, 1); + + // transformed + passed &= + test<4, T, 1, 1, 8, 4, 4, false, true, L1H, L3H, true>(16, 10, 32, 6, 5); + passed &= test<5, T, 1, 1, 6, 10, 1, false, true, L1H, L3H, true>(18, 10, 32, + 12, 0); + passed &= + test<6, T, 1, 1, 16, 2, 2, false, true, L1H, L3H, true>(32, 4, 32, 4, 1); + passed &= + test<7, T, 2, 2, 2, 16, 2, false, true, L1H, L3H, true>(4, 20, 32, 0, 3); + passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L3H, true>(24, 50, 32, + 4, 14); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_prefetch_u32.cpp b/SYCL/ESIMD/lsc/lsc_block_prefetch_u32.cpp new file mode 100644 index 0000000000..c5f3ae7f76 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_prefetch_u32.cpp @@ -0,0 +1,48 @@ +//==------------ lsc_block_prefetch_u32.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint32_t; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 4, 1, false, false, L1H, L3H, true>(16, 16, 32, + 2, 1); + passed &= + test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 16, 1, 5); + passed &= + test<3, T, 1, 1, 8, 2, 2, false, false, L1H, L3H, true>(16, 4, 16, 3, 1); + + // transposed + passed &= + test<4, T, 1, 1, 1, 16, 1, true, false, L1H, L3H, true>(16, 20, 16, 1, 2); + passed &= + test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 10, 1); + passed &= + test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 11, 1); + passed &= + test<7, T, 2, 2, 8, 2, 1, true, false, L1H, L3H, true>(16, 4, 16, 1, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_prefetch_u64.cpp b/SYCL/ESIMD/lsc/lsc_block_prefetch_u64.cpp new file mode 100644 index 0000000000..6961a7fac2 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_prefetch_u64.cpp @@ -0,0 +1,48 @@ +//==------------ lsc_block_prefetch_u64.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint64_t; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= + test<1, T, 1, 1, 8, 32, 1, false, false, L1H, L3H, true>(8, 32, 8, 0, 0); + passed &= + test<2, T, 2, 2, 8, 4, 1, false, false, L1H, L3H, true>(16, 16, 16, 1, 5); + passed &= + test<3, T, 1, 1, 4, 2, 1, false, false, L1H, L3H, true>(16, 4, 16, 3, 1); + + // transposed + passed &= + test<4, T, 1, 1, 1, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 1, 2); + passed &= + test<5, T, 1, 1, 2, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 10, 1); + passed &= + test<6, T, 1, 1, 4, 8, 1, true, false, L1H, L3H, true>(16, 10, 16, 11, 1); + passed &= + test<7, T, 2, 2, 4, 8, 1, true, false, L1H, L3H, true>(16, 9, 16, 1, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_prefetch_u8.cpp b/SYCL/ESIMD/lsc/lsc_block_prefetch_u8.cpp new file mode 100644 index 0000000000..81387fba29 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_prefetch_u8.cpp @@ -0,0 +1,50 @@ +//==------------ lsc_block_prefetch_u8.cpp - DPC++ ESIMD on-device test ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_load.hpp" + +constexpr uint32_t seed = 322; +using T = uint8_t; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // These parameters require unpadding. It is not implemented yet + // passed &= test<0, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + // non transposed, non transformed + passed &= test<1, T, 1, 1, 16, 32, 2, false, false, L1H, L3H, true>( + 40, 64, 64, 4, 21); + passed &= + test<2, T, 2, 2, 8, 8, 2, false, false, L1H, L3H, true>(16, 16, 64, 8, 5); + passed &= test<3, T, 1, 1, 8, 32, 2, false, false, L1H, L3H, true>(16, 80, 64, + 4, 1); + + // transformed + passed &= test<4, T, 1, 1, 16, 4, 4, false, true, L1H, L3H, true>(100, 10, + 128, 16, 5); + passed &= test<5, T, 1, 1, 12, 20, 1, false, true, L1H, L3H, true>(16, 40, 64, + 0, 0); + passed &= + test<6, T, 1, 1, 16, 4, 2, false, true, L1H, L3H, true>(32, 4, 64, 4, 1); + passed &= + test<7, T, 2, 2, 4, 16, 2, false, true, L1H, L3H, true>(4, 20, 64, 0, 3); + passed &= test<8, T, 1, 1, 16, 32, 1, false, true, L1H, L3H, true>(24, 80, 64, + 4, 14); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_store_u16.cpp b/SYCL/ESIMD/lsc/lsc_block_store_u16.cpp new file mode 100644 index 0000000000..a35ccfb53c --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_store_u16.cpp @@ -0,0 +1,27 @@ +//==------------ lsc_block_store_u16.cpp - DPC++ ESIMD on-device test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_store.hpp" + +constexpr uint32_t seed = 295; +using T = uint16_t; + +int main(void) { + srand(seed); + bool passed = true; + + passed &= test<1, T, 1, 1, 32, 8>(40, 20, 64, 8, 11); + passed &= test<2, T, 2, 2, 2, 2>(16, 4, 32, 2, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_block_store_u32.cpp new file mode 100644 index 0000000000..5597ecac45 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_store_u32.cpp @@ -0,0 +1,27 @@ +//==------------ lsc_block_store_u32.cpp - DPC++ ESIMD on-device test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_store.hpp" + +constexpr uint32_t seed = 633; +using T = uint32_t; + +int main(void) { + srand(seed); + bool passed = true; + + passed &= test<1, T, 1, 1, 16, 8>(32, 20, 64, 5, 11); + passed &= test<2, T, 2, 2, 2, 2>(16, 4, 16, 1, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_block_store_u8.cpp b/SYCL/ESIMD/lsc/lsc_block_store_u8.cpp new file mode 100644 index 0000000000..d60a751baa --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_block_store_u8.cpp @@ -0,0 +1,27 @@ +//==------------ lsc_block_store_u8.cpp - DPC++ ESIMD on-device test ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_block_store.hpp" + +constexpr uint32_t seed = 336; +using T = uint8_t; + +int main(void) { + srand(seed); + bool passed = true; + + passed &= test<1, T, 1, 1, 64, 8>(80, 20, 96, 16, 11); + passed &= test<2, T, 2, 2, 4, 2>(16, 4, 64, 4, 1); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp b/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp deleted file mode 100644 index a927b5d212..0000000000 --- a/SYCL/ESIMD/lsc/lsc_fence_pvc.cpp +++ /dev/null @@ -1,87 +0,0 @@ -//==------- lsc_fence_pvc.cpp - DPC++ ESIMD on-device test -----------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This test checks lsc_fence intrinsic. - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "../esimd_test_utils.hpp" - -#include -#include -#include -#include -#include - -int main() { - using namespace cl::sycl; - using namespace sycl::ext::intel::experimental::esimd; - auto size = size_t{512}; - unsigned constexpr SIMDSize = 8; - - queue q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler()); - auto device = q.get_device(); - std::cout << "Device name: " << device.get_info() - << std::endl; - - auto *res_vec = malloc_shared(size, q); - std::fill(res_vec, res_vec + size, 0); - - try { - q.submit([&](handler &h) { - h.parallel_for( - range<2>{size / SIMDSize, 2}, [=](id<2> id) SYCL_ESIMD_KERNEL { - // Basically this kernel is an example from wiki: - // https://en.wikipedia.org/wiki/Memory_barrier#Example - slm_init(8192); - auto offset = id[0] * SIMDSize; - auto byte_offset = offset * sizeof(int); - auto cond_offset = size * sizeof(int) + byte_offset; - if (id[1] % 2 == 0) { - // First thread: write data and condition - // and provoke gpu to reorder instructions - auto data = simd(offset, 1); - lsc_slm_block_store(byte_offset, data * 10); - lsc_slm_block_store(byte_offset, data * 5); - lsc_slm_block_store(byte_offset, data); - // Protect from reordering for the last two instructions - lsc_fence(); - lsc_slm_block_store(cond_offset, - simd(1)); - } else { - auto condition = simd(0); - int imax = 1000; - int i = 0; - while (condition[0] == 0 && i < imax) { - condition = lsc_slm_block_load(cond_offset); - ++i; - } - // Protect from reordering for the while cycle and data read - lsc_fence(); - auto data = lsc_slm_block_load(byte_offset); - lsc_block_store(res_vec + offset, data); - } - }); - }); - q.wait(); - } catch (sycl::exception e) { - std::cout << "SYCL exception caught: " << e.what(); - free(res_vec, q); - return 1; - } - - auto error = 0; - for (auto i = 0; i != size; ++i) { - error += std::abs(res_vec[i] - i); - } - std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; - free(res_vec, q); - return error; -} diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp deleted file mode 100644 index 22b5cd700e..0000000000 --- a/SYCL/ESIMD/lsc/lsc_flat_load_u64.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==------- lsc_flat_load_u64.cpp - DPC++ ESIMD on-device test -------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_flat_load.hpp" - -constexpr uint32_t seed = 187; - -int main(void) { - srand(seed); - bool passed = true; - - // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail - - // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp deleted file mode 100644 index d64e8f219e..0000000000 --- a/SYCL/ESIMD/lsc/lsc_flat_store_u16u32.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//==------- lsc_flat_store_u16u32.cpp - DPC++ ESIMD on-device test ---------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_flat_store.hpp" - -constexpr uint32_t seed = 286; -constexpr lsc_data_size DS = lsc_data_size::u16u32; - -int main(void) { - srand(seed); - bool passed = true; - - // non-transpose - passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); - passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); - passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); - passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); - passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp deleted file mode 100644 index 2c11cc5a77..0000000000 --- a/SYCL/ESIMD/lsc/lsc_flat_store_u64.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==------- lsc_flat_store_u64.cpp - DPC++ ESIMD on-device test ------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_flat_store.hpp" - -constexpr uint32_t seed = 287; - -int main(void) { - srand(seed); - bool passed = true; - - // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail - - // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_slm_pvc.cpp b/SYCL/ESIMD/lsc/lsc_slm.cpp similarity index 88% rename from SYCL/ESIMD/lsc/lsc_slm_pvc.cpp rename to SYCL/ESIMD/lsc/lsc_slm.cpp index a3ed61ba58..c2d96a9528 100644 --- a/SYCL/ESIMD/lsc/lsc_slm_pvc.cpp +++ b/SYCL/ESIMD/lsc/lsc_slm.cpp @@ -1,14 +1,12 @@ -//==------- lsc_slm_pvc.cpp - DPC++ ESIMD on-device test -------------------==// +//==------------ lsc_slm.cpp - DPC++ ESIMD on-device test ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// This test checks 1d slm lsc intrinsics - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -18,10 +16,11 @@ #include #include #include -#include +#include int main() { using namespace cl::sycl; + using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; auto size = size_t{128}; auto constexpr SIMDSize = unsigned{4}; @@ -57,8 +56,8 @@ int main() { sizeof(int)); auto data = simd(id * SIMDSize, 1); auto pred = simd_mask(1); - auto add = simd(5); - auto compare = simd(id * SIMDSize, 1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); auto swap = compare * 2; slm_init(4096); @@ -71,17 +70,17 @@ int main() { lsc_block_store(access_1, offset, data_1); lsc_slm_block_store(offset, data); - lsc_slm_atomic_update(offsets, pred); + lsc_slm_atomic_update(offsets, pred); auto data_2 = lsc_slm_block_load(offset); lsc_block_store(access_2, offset, data_2); lsc_slm_block_store(offset, data); - lsc_slm_atomic_update(offsets, add, pred); + lsc_slm_atomic_update(offsets, add, pred); auto data_3 = lsc_slm_block_load(offset); lsc_block_store(access_3, offset, data_3); lsc_slm_block_store(offset, data); - lsc_slm_atomic_update(offsets, compare, + lsc_slm_atomic_update(offsets, compare, swap, pred); auto data_4 = lsc_slm_block_load(offset); lsc_block_store(access_4, offset, data_4); @@ -106,6 +105,6 @@ int main() { error += std::abs(vec_3[i] - (i + 5)); error += std::abs(vec_4[i] - (i * 2)); } - std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; return error; } diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp deleted file mode 100644 index f7133f3055..0000000000 --- a/SYCL/ESIMD/lsc/lsc_slm_load_u16u32.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//==------- lsc_slm_load_u16u32.cpp - DPC++ ESIMD on-device test -----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_slm_load.hpp" - -constexpr uint32_t seed = 175; -constexpr lsc_data_size DS = lsc_data_size::u16u32; - -int main(void) { - srand(seed); - bool passed = true; - - // non-transpose - passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); - passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); - passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); - passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); - passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp deleted file mode 100644 index d923b41ab7..0000000000 --- a/SYCL/ESIMD/lsc/lsc_slm_load_u32.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==------- lsc_slm_load_u32.cpp - DPC++ ESIMD on-device test --------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_slm_load.hpp" - -constexpr uint32_t seed = 177; - -int main(void) { - srand(seed); - bool passed = true; - - // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail - - // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp deleted file mode 100644 index 33875a4d02..0000000000 --- a/SYCL/ESIMD/lsc/lsc_slm_load_u8u32.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//==------- lsc_slm_load_u8u32.cpp - DPC++ ESIMD on-device test ------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_slm_load.hpp" - -constexpr uint32_t seed = 174; -constexpr lsc_data_size DS = lsc_data_size::u8u32; - -int main(void) { - srand(seed); - bool passed = true; - - // non-transpose - passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); - passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); - passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); - passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); - passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp deleted file mode 100644 index 952e96b1b6..0000000000 --- a/SYCL/ESIMD/lsc/lsc_slm_store_u32.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==------- lsc_slm_store_u32.cpp - DPC++ ESIMD on-device test -------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_slm_store.hpp" - -constexpr uint32_t seed = 277; - -int main(void) { - srand(seed); - bool passed = true; - - // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail - - // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp deleted file mode 100644 index 8cedb81e5b..0000000000 --- a/SYCL/ESIMD/lsc/lsc_slm_store_u8u32.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//==------- lsc_slm_store_u8u32.cpp - DPC++ ESIMD on-device test -----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -o %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -#include "Inputs/lsc_slm_store.hpp" - -constexpr uint32_t seed = 274; -constexpr lsc_data_size DS = lsc_data_size::u8u32; - -int main(void) { - srand(seed); - bool passed = true; - - // non-transpose - passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS>(rand()); - passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS>(rand()); - passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS>(rand()); - passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS>(rand()); - passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS>(rand()); - - std::cout << (passed ? "Passed\n" : "FAILED\n"); - return passed ? 0 : 1; -} diff --git a/SYCL/ESIMD/lsc/lsc_surf_pvc.cpp b/SYCL/ESIMD/lsc/lsc_surf.cpp similarity index 82% rename from SYCL/ESIMD/lsc/lsc_surf_pvc.cpp rename to SYCL/ESIMD/lsc/lsc_surf.cpp index 6050d2f82e..39d721ca34 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_pvc.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf.cpp @@ -1,15 +1,13 @@ -//==------- lsc_surf_pvc.cpp - DPC++ ESIMD on-device test ------------------==// +//==------------ lsc_surf.cpp - DPC++ ESIMD on-device test -----------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// This test checks 1d surf lsc intrinsics - // REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -DESIMD_GEN12_7 -o %t.out +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out #include "../esimd_test_utils.hpp" @@ -18,10 +16,11 @@ #include #include #include -#include +#include int main() { using namespace cl::sycl; + using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; auto size = size_t{128}; auto constexpr SIMDSize = unsigned{4}; @@ -61,26 +60,26 @@ int main() { auto offsets = simd(id * SIMDSize * sizeof(int), sizeof(int)); auto pred = simd_mask(1); - auto add = simd(5); - auto compare = simd(id * SIMDSize, 1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); auto swap = compare * 2; lsc_prefetch(access_0, - offset); + cache_hint::cached, cache_hint::uncached>(access_0, + offset); auto data_0 = lsc_block_load(access_0, offset); lsc_block_store(access_0, offset, data_0 * 2); lsc_prefetch(access_1, - offsets); + cache_hint::cached, cache_hint::uncached>(access_1, + offsets); auto data_1 = lsc_gather(access_1, offsets); lsc_scatter(access_1, offsets, data_1 * 2); - lsc_atomic_update(access_2, offsets, pred); - lsc_atomic_update(access_3, offsets, add, + lsc_atomic_update(access_2, offsets, pred); + lsc_atomic_update(access_3, offsets, add, pred); - lsc_atomic_update(access_4, offsets, + lsc_atomic_update(access_4, offsets, compare, swap, pred); }); }); @@ -103,6 +102,6 @@ int main() { error += std::abs(vec_3[i] - (i + 5)); error += std::abs(vec_4[i] - (i * 2)); } - std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; return error; } diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp index 0107ccb62f..0083d070d4 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u16u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp index 8cecc3da72..46922f96dc 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint32_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint32_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp index c226986c7c..b1fb41db6a 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u64.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -19,19 +19,20 @@ int main(void) { bool passed = true; // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + passed &= test<0, uint64_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); + + // passed &= test<6, uint64_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint64_t, 1, 4, 8, 3, false>(rand()); // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true>(); std::cout << (passed ? "Passed\n" : "FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp index 3800c56c17..36812d57f3 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_load_u8u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp new file mode 100644 index 0000000000..c36da7efa9 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u16u32.cpp @@ -0,0 +1,35 @@ +//==------------ lsc_surf_prefetch_u16u32.cpp - DPC++ ESIMD on-device test -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 197; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_prefetch_u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u32.cpp new file mode 100644 index 0000000000..56ab070080 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u32.cpp @@ -0,0 +1,40 @@ +//==------------ lsc_surf_prefetch_u32.cpp - DPC++ ESIMD on-device test ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 199; +constexpr lsc_data_size DS = lsc_data_size::u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1); + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true, DS, L1H, L3H, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true, DS, L1H, L3H, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true, DS, L1H, L3H, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_prefetch_u64.cpp b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u64.cpp new file mode 100644 index 0000000000..c8a7a70c69 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u64.cpp @@ -0,0 +1,40 @@ +//==------------ lsc_surf_prefetch_u64.cpp - DPC++ ESIMD on-device test -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 198; +constexpr lsc_data_size DS = lsc_data_size::u64; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint64_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1); + + // transpose + passed &= test<8, uint64_t, 1, 4, 1, 32, true, DS, L1H, L3H, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true, DS, L1H, L3H, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true, DS, L1H, L3H, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp new file mode 100644 index 0000000000..0b13ea06bf --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_surf_prefetch_u8u32.cpp @@ -0,0 +1,35 @@ +//==------------ lsc_surf_prefetch_u8u32.cpp - DPC++ ESIMD on-device test --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_surf_load.hpp" + +constexpr uint32_t seed = 196; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp index 788ea1397f..3ec1cae1b9 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u16u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp index f7222d53d2..dc50e796c2 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint32_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint32_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp index 7320733866..447238e95c 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u64.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -19,19 +19,20 @@ int main(void) { bool passed = true; // non transpose - passed &= test<0, uint32_t, 1, 4, 32, 1, false>(rand()); - passed &= test<1, uint32_t, 1, 4, 32, 2, false>(rand()); - passed &= test<2, uint32_t, 1, 4, 16, 2, false>(rand()); - passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); - passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); - passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + passed &= test<0, uint64_t, 1, 4, 32, 1, false>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); + + // passed &= test<6, uint64_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint64_t, 1, 4, 8, 3, false>(rand()); // transpose - passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); - passed &= test<9, uint32_t, 2, 2, 1, 16, true>(); - passed &= test<10, uint32_t, 4, 4, 1, 4, true>(); + passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true>(); std::cout << (passed ? "Passed\n" : "FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp index c991dfa0d1..611e21470b 100644 --- a/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_surf_store_u8u32.cpp @@ -5,8 +5,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/ESIMD/lsc/lsc_flat_pvc.cpp b/SYCL/ESIMD/lsc/lsc_usm.cpp similarity index 81% rename from SYCL/ESIMD/lsc/lsc_flat_pvc.cpp rename to SYCL/ESIMD/lsc/lsc_usm.cpp index 9252a584e4..d9226af0fc 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_pvc.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm.cpp @@ -1,14 +1,12 @@ -//==------- lsc_flat_pvc.cpp - DPC++ ESIMD on-device test ------------------==// +//==------------ lsc_usm.cpp - DPC++ ESIMD on-device test ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// This test checks 1d flat lsc intrinsics - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -18,10 +16,11 @@ #include #include #include -#include +#include int main() { using namespace cl::sycl; + using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; auto size = size_t{128}; auto constexpr SIMDSize = unsigned{4}; @@ -51,25 +50,25 @@ int main() { auto offsets = simd(id * SIMDSize * sizeof(int), sizeof(int)); auto pred = simd_mask(1); - auto add = simd(5); - auto compare = simd(id * SIMDSize, 1); + auto add = simd(5); + auto compare = simd(id * SIMDSize, 1); auto swap = compare * 2; lsc_prefetch(vec_0 + - offset); + cache_hint::cached, cache_hint::uncached>(vec_0 + + offset); auto data_0 = lsc_block_load(vec_0 + offset); lsc_block_store(vec_0 + offset, data_0 * 2); lsc_prefetch(vec_1, - offsets); + cache_hint::cached, cache_hint::uncached>(vec_1, + offsets); auto data_1 = lsc_gather(vec_1, offsets); lsc_scatter(vec_1, offsets, data_1 * 2); - lsc_atomic_update(vec_2, offsets, pred); - lsc_atomic_update(vec_3, offsets, add, pred); - lsc_atomic_update(vec_4, offsets, compare, + lsc_atomic_update(vec_2, offsets, pred); + lsc_atomic_update(vec_3, offsets, add, pred); + lsc_atomic_update(vec_4, offsets, compare, swap, pred); }); }); @@ -97,6 +96,6 @@ int main() { sycl::free(vec_2, q); sycl::free(vec_3, q); sycl::free(vec_4, q); - std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; return error; } diff --git a/SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp b/SYCL/ESIMD/lsc/lsc_usm_2d.cpp similarity index 85% rename from SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp rename to SYCL/ESIMD/lsc/lsc_usm_2d.cpp index 681c731d09..daf3e9dac9 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_2d_pvc.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_2d.cpp @@ -1,14 +1,12 @@ -//==------- lsc_flat_2d_pvc.cpp - DPC++ ESIMD on-device test ---------------==// +//==------------ lsc_usm_2d.cpp - DPC++ ESIMD on-device test ---------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// This test checks 2d flat lsc intrinsics - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -19,10 +17,11 @@ #include #include #include -#include +#include int main() { using namespace cl::sycl; + using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; unsigned data_height = 4; unsigned data_width = 9; @@ -59,15 +58,15 @@ int main() { q.submit([&](handler &h) { h.parallel_for( range<1>{1}, [=](id<1> id) SYCL_ESIMD_KERNEL { - lsc_prefetch2d( + lsc_prefetch2d( input, (data_width * sizeof(int)) - 1, data_height - 1, (data_pitch * sizeof(int)) - 1, x, y); auto data = lsc_load2d( input, (data_width * sizeof(int)) - 1, data_height - 1, (data_pitch * sizeof(int)) - 1, x, y); - lsc_store2d( block_store, (data_width * sizeof(int)) - 1, data_height - 1, (data_pitch * sizeof(int)) - 1, x, y, data); @@ -86,6 +85,6 @@ int main() { error += std::abs(ref[i] - block_store[i]); free(input, q); free(block_store, q); - std::cout << (error != 0 ? "FAILED" : "PASSED") << std::endl; + std::cout << (error != 0 ? "FAILED" : "passed") << std::endl; return error; } diff --git a/SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp b/SYCL/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp similarity index 88% rename from SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp rename to SYCL/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp index 01e709e0b9..6fb0a85628 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_atomic_cachehint_pvc.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_atomic_cachehint.cpp @@ -1,13 +1,13 @@ -//==---- lsc_flat_atomic_cachehint_pvc.cpp - DPC++ ESIMD on-device test ----==// +//==--- lsc_usm_atomic_cachehint.cpp - DPC++ ESIMD on-device test ----------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc -// RUN: %clangxx -fsycl %s -DESIMD_GEN12_7 -o %t.out +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out #include "../esimd_test_utils.hpp" @@ -15,13 +15,14 @@ #include #include #include -#include +#include class Test; #define DTYPE float using namespace cl::sycl; +using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental::esimd; ESIMD_INLINE void atomic_add_float(DTYPE *sA, simd_mask<16> M) { @@ -29,10 +30,8 @@ ESIMD_INLINE void atomic_add_float(DTYPE *sA, simd_mask<16> M) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); simd mat({0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}); - lsc_atomic_update( + lsc_atomic_update( (float *)sA, offsets * sizeof(float), mat, M); } diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_load_u16u32.cpp similarity index 87% rename from SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_load_u16u32.cpp index 1eaa79411c..1e79a8d5fe 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_load_u16u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_load_u16u32.cpp @@ -1,16 +1,16 @@ -//==------- lsc_flat_load_u16u32.cpp - DPC++ ESIMD on-device test ----------==// +//==------- lsc_usm_load_u16u32.cpp - DPC++ ESIMD on-device test -----------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_flat_load.hpp" +#include "Inputs/lsc_usm_load.hpp" constexpr uint32_t seed = 186; constexpr lsc_data_size DS = lsc_data_size::u16u32; diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_load_u32.cpp similarity index 81% rename from SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_load_u32.cpp index 119519bcba..1a4560d437 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_load_u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_load_u32.cpp @@ -1,16 +1,16 @@ -//==------- lsc_flat_load_u32.cpp - DPC++ ESIMD on-device test -------------==// +//==------- lsc_usm_load_u32.cpp - DPC++ ESIMD on-device test --------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_flat_load.hpp" +#include "Inputs/lsc_usm_load.hpp" constexpr uint32_t seed = 188; @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint32_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint32_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp b/SYCL/ESIMD/lsc/lsc_usm_load_u64.cpp similarity index 75% rename from SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp rename to SYCL/ESIMD/lsc/lsc_usm_load_u64.cpp index 4655a3d752..66d278af36 100644 --- a/SYCL/ESIMD/lsc/lsc_slm_load_u64.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_load_u64.cpp @@ -1,18 +1,18 @@ -//==------- lsc_slm_load_u64.cpp - DPC++ ESIMD on-device test --------------==// +//==------------ lsc_usm_load_u64.cpp - DPC++ ESIMD on-device test ---------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out -// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_slm_load.hpp" +#include "Inputs/lsc_usm_load.hpp" -constexpr uint32_t seed = 176; +constexpr uint32_t seed = 187; int main(void) { srand(seed); @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint64_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint64_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint64_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint64_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_load_u8u32.cpp similarity index 87% rename from SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_load_u8u32.cpp index 0259755c47..d19dd42deb 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_load_u8u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_load_u8u32.cpp @@ -1,16 +1,16 @@ -//==------- lsc_flat_load_u8u32.cpp - DPC++ ESIMD on-device test -----------==// +//==------- lsc_usm_load_u8u32.cpp - DPC++ ESIMD on-device test ------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_flat_load.hpp" +#include "Inputs/lsc_usm_load.hpp" constexpr uint32_t seed = 185; constexpr lsc_data_size DS = lsc_data_size::u8u32; diff --git a/SYCL/ESIMD/lsc/lsc_usm_prefetch_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u16u32.cpp new file mode 100644 index 0000000000..818e7647b2 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u16u32.cpp @@ -0,0 +1,35 @@ +//==------------ lsc_usm_prefetch_u16u32.cpp - DPC++ ESIMD on-device test --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_usm_load.hpp" + +constexpr uint32_t seed = 186; +constexpr lsc_data_size DS = lsc_data_size::u16u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_usm_prefetch_u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u32.cpp new file mode 100644 index 0000000000..ae0ac9fde8 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u32.cpp @@ -0,0 +1,40 @@ +//==------------ lsc_usm_prefetch_u32.cpp - DPC++ ESIMD on-device test -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_usm_load.hpp" + +constexpr uint32_t seed = 188; +constexpr lsc_data_size DS = lsc_data_size::u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1); + passed &= test<5, uint32_t, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1); + + // transpose + passed &= test<8, uint32_t, 1, 4, 1, 32, true, DS, L1H, L3H, true>(); + passed &= test<9, uint32_t, 2, 2, 1, 16, true, DS, L1H, L3H, true>(); + passed &= test<10, uint32_t, 4, 4, 1, 4, true, DS, L1H, L3H, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_usm_prefetch_u64.cpp b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u64.cpp new file mode 100644 index 0000000000..e63858d81a --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u64.cpp @@ -0,0 +1,40 @@ +//==------------ lsc_usm_prefetch_u64.cpp - DPC++ ESIMD on-device test -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_usm_load.hpp" + +constexpr uint32_t seed = 187; +constexpr lsc_data_size DS = lsc_data_size::u64; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non transpose + passed &= test<0, uint64_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint64_t, 1, 4, 32, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint64_t, 1, 4, 16, 2, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint64_t, 1, 4, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint64_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(1); + passed &= test<5, uint64_t, 2, 1, 1, 1, false, DS, L1H, L3H, true>(1); + + // transpose + passed &= test<8, uint64_t, 1, 4, 1, 32, true, DS, L1H, L3H, true>(); + passed &= test<9, uint64_t, 2, 2, 1, 16, true, DS, L1H, L3H, true>(); + passed &= test<10, uint64_t, 4, 4, 1, 4, true, DS, L1H, L3H, true>(); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_usm_prefetch_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u8u32.cpp new file mode 100644 index 0000000000..a6dacd8f50 --- /dev/null +++ b/SYCL/ESIMD/lsc/lsc_usm_prefetch_u8u32.cpp @@ -0,0 +1,35 @@ +//==------------ lsc_usm_prefetch_u8u32.cpp - DPC++ ESIMD on-device test ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include "Inputs/lsc_usm_load.hpp" + +constexpr uint32_t seed = 185; +constexpr lsc_data_size DS = lsc_data_size::u8u32; + +constexpr cache_hint L1H = cache_hint::cached; +constexpr cache_hint L3H = cache_hint::uncached; + +int main(void) { + srand(seed); + bool passed = true; + + // non-transpose + passed &= test<0, uint32_t, 1, 1, 1, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<1, uint32_t, 1, 4, 32, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<2, uint32_t, 2, 4, 16, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<3, uint32_t, 2, 2, 8, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<4, uint32_t, 4, 2, 4, 1, false, DS, L1H, L3H, true>(rand()); + passed &= test<5, uint32_t, 4, 16, 2, 1, false, DS, L1H, L3H, true>(rand()); + + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_store_u16u32.cpp similarity index 86% rename from SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_store_u16u32.cpp index d4b3ba375a..382829133e 100644 --- a/SYCL/ESIMD/lsc/lsc_slm_store_u16u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_store_u16u32.cpp @@ -1,18 +1,18 @@ -//==------- lsc_slm_store_u16u32.cpp - DPC++ ESIMD on-device test ----------==// +//==------- lsc_usm_store_u16u32.cpp - DPC++ ESIMD on-device test ----------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_slm_store.hpp" +#include "Inputs/lsc_usm_store.hpp" -constexpr uint32_t seed = 275; +constexpr uint32_t seed = 286; constexpr lsc_data_size DS = lsc_data_size::u16u32; int main(void) { diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_store_u32.cpp similarity index 81% rename from SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_store_u32.cpp index 781125b323..5d423efb0f 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_store_u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_store_u32.cpp @@ -1,16 +1,16 @@ -//==------- lsc_flat_store_u32.cpp - DPC++ ESIMD on-device test ------------==// +//==------- lsc_usm_store_u32.cpp - DPC++ ESIMD on-device test -------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_flat_store.hpp" +#include "Inputs/lsc_usm_store.hpp" constexpr uint32_t seed = 288; @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint32_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint32_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint32_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint32_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint32_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint32_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint32_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint32_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp b/SYCL/ESIMD/lsc/lsc_usm_store_u64.cpp similarity index 79% rename from SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp rename to SYCL/ESIMD/lsc/lsc_usm_store_u64.cpp index a40cfacf05..50e7f7e9ed 100644 --- a/SYCL/ESIMD/lsc/lsc_slm_store_u64.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_store_u64.cpp @@ -1,18 +1,18 @@ -//==------- lsc_slm_store_u64.cpp - DPC++ ESIMD on-device test -------------==// +//==------- lsc_usm_store_u64.cpp - DPC++ ESIMD on-device test -------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_slm_store.hpp" +#include "Inputs/lsc_usm_store.hpp" -constexpr uint32_t seed = 276; +constexpr uint32_t seed = 287; int main(void) { srand(seed); @@ -25,8 +25,9 @@ int main(void) { passed &= test<3, uint64_t, 1, 4, 4, 1, false>(rand()); passed &= test<4, uint64_t, 1, 1, 1, 1, false>(1); passed &= test<5, uint64_t, 2, 1, 1, 1, false>(1); - // passed &= test<6, uint64_t, 1, 4, 8, 2>(rand()); // merge fail - // passed &= test<7, uint64_t, 1, 4, 8, 3>(rand()); // exec fail + + // passed &= test<6, uint64_t, 1, 4, 8, 2, false>(rand()); + // passed &= test<7, uint64_t, 1, 4, 8, 3, false>(rand()); // transpose passed &= test<8, uint64_t, 1, 4, 1, 32, true>(); diff --git a/SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp b/SYCL/ESIMD/lsc/lsc_usm_store_u8u32.cpp similarity index 87% rename from SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp rename to SYCL/ESIMD/lsc/lsc_usm_store_u8u32.cpp index 22c81a7e05..69a104e00d 100644 --- a/SYCL/ESIMD/lsc/lsc_flat_store_u8u32.cpp +++ b/SYCL/ESIMD/lsc/lsc_usm_store_u8u32.cpp @@ -1,16 +1,16 @@ -//==------- lsc_flat_store_u8u32.cpp - DPC++ ESIMD on-device test ----------==// +//==------- lsc_usm_store_u8u32.cpp - DPC++ ESIMD on-device test -----------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - // REQUIRES: gpu-intel-pvc +// UNSUPPORTED: cuda || hip // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -#include "Inputs/lsc_flat_store.hpp" +#include "Inputs/lsc_usm_store.hpp" constexpr uint32_t seed = 285; constexpr lsc_data_size DS = lsc_data_size::u8u32;