diff --git a/sycl/test/check_device_code/vector/vector_as.cpp b/sycl/test/check_device_code/vector/vector_as.cpp index f42fb56b58791..32648ecbf168b 100644 --- a/sycl/test/check_device_code/vector/vector_as.cpp +++ b/sycl/test/check_device_code/vector/vector_as.cpp @@ -3,9 +3,6 @@ // NOTE: removed/disabled to re-generate the checks. // RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s -// RUN: %if preview-breaking-changes-supported %{ \ -// RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s -fpreview-breaking-changes | FileCheck %s --check-prefix=CHECK-PREVIEW \ -// RUN: %} // Windows/linux have some slight differences in IR generation (function // arguments passing and long/long long differences/mangling) that could @@ -21,10 +18,3 @@ template SYCL_EXTERNAL sycl::vec sycl::vec::as + +template SYCL_EXTERNAL sycl::vec +sycl::vec::as>() const; +// CHECK-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false) +// CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp index 08c6c6882e43c..d168058306afb 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp @@ -3,7 +3,7 @@ // Had to increase inline threashold for this test to force inline of the vec<> // math builtins. -// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -mllvm -inline-threshold=400 -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -O3 -fsycl-device-only %s -o - | FileCheck %s +// RUN: %clangxx -I %sycl_include -mllvm -inline-threshold=400 -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -O3 -fsycl-device-only %s -o - | FileCheck %s // Windows/linux have some slight differences in IR generation (function // arguments passing and long/long long differences/mangling) that could diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp new file mode 100644 index 0000000000000..5c27c66fda0ad --- /dev/null +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins_preview.cpp @@ -0,0 +1,282 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 4 +// NOTE: ..., followed by some manual cleanup. + +// Had to increase inline threashold for this test to force inline of the vec<> +// math builtins. +// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -mllvm -inline-threshold=400 -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -O3 -fsycl-device-only %s -o - | FileCheck %s + +// Windows/linux have some slight differences in IR generation (function +// arguments passing and long/long long differences/mangling) that could +// complicate test updates while not improving test coverage. Limiting to linux +// should be fine. +// REQUIRES: linux && preview-breaking-changes-supported + +// This test checks the device code generated for vec math builtins. +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi; +using namespace sycl::ext::oneapi::experimental; + +// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMinN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi2EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I8_I:%.*]] = alloca <2 x float>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I9_I:%.*]] = alloca [2 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [2 x float], align 4 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <2 x i16>, align 4 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [2 x float], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[A]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[B]], align 4 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META11:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META11]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: store <2 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 4, !tbaa [[TBAA14:![0-9]+]], !noalias [[META11]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5:[0-9]+]], !noalias [[META11]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META11]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META11]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META11]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META17:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META17]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) +// CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 4, !tbaa [[TBAA14]], !noalias [[META17]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META17]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !tbaa [[TBAA14]], !noalias [[META17]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META17]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META17]] +// CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <2 x float> @_Z16__spirv_ocl_fminDv2_fS_(<2 x float> noundef [[TMP2]], <2 x float> noundef [[TMP3]]) #[[ATTR6:[0-9]+]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I10_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I8_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I11_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I9_I]] to ptr addrspace(4) +// CHECK-NEXT: store <2 x float> [[CALL2_I_I]], ptr [[VEC_ADDR_I_I_I_I8_I]], align 8, !tbaa [[TBAA14]], !noalias [[META23]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I10_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I11_I]]) #[[ATTR5]], !noalias [[META23]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DST_I_I_I_I9_I]], align 2, !tbaa [[TBAA14]], !noalias [[META23]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META23]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFMin(vec a, vec b) { + return experimental::fmin(a, b); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.71") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[B]], align 8 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I6_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I15_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I12_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I16_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I13_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] +// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFMax(vec a, vec b) { + return experimental::fmax(a, b); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.149") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.188") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META42:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META42]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META42]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META42]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META42]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META42]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META42]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef <4 x i8> @_Z13__spirv_IsNanDv4_f(<4 x float> noundef [[TMP1]]) #[[ATTR6]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef <4 x i32> @_Z22__spirv_SConvert_Rint4Dv4_a(<4 x i8> noundef [[CALL_I_I_I_I]]) #[[ATTR6]] +// CHECK-NEXT: [[CALL_I_I_I2_I:%.*]] = call spir_func noundef <4 x i16> @_Z24__spirv_SConvert_Rshort4Dv4_i(<4 x i32> noundef [[CALL_I_I_I_I_I_I]]) #[[ATTR6]] +// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestIsNan(vec a) { + return experimental::isnan(a); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [8 x float], align 4 +// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <8 x i16>, ptr [[A]], align 16 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META52:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META52]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA14]], !noalias [[META52]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META52]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META52]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META52]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META52]] +// CHECK-NEXT: [[CALL1_I_I:%.*]] = call spir_func noundef <8 x float> @_Z16__spirv_ocl_fabsDv8_f(<8 x float> noundef [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) +// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA14]], !noalias [[META58]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META58]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META58]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META58]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFabs(vec a) { + return experimental::fabs(a); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <8 x i16>, align 16 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [8 x float], align 4 +// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <8 x i16>, ptr [[A]], align 16 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META63:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META63]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA14]], !noalias [[META63]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META63]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META63]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META63]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META63]] +// CHECK-NEXT: [[CALL1_I_I:%.*]] = call spir_func noundef <8 x float> @_Z16__spirv_ocl_ceilDv8_f(<8 x float> noundef [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) +// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA14]], !noalias [[META69]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META69]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META69]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META69]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestCeil(vec a) { + return experimental::ceil(a); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.420") align 32 initializes((0, 32)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 +// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I7_I:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: [[DST_I_I_I_I8_I:%.*]] = alloca [16 x float], align 4 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I3_I:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: [[DST_I_I_I_I4_I:%.*]] = alloca [16 x float], align 4 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <16 x i16>, align 32 +// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [16 x float], align 4 +// CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[A]], align 32 +// CHECK-NEXT: [[AGG_TMP1_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[B]], align 32 +// CHECK-NEXT: [[AGG_TMP2_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[C]], align 32 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META74:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META74]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: store <16 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 32, !tbaa [[TBAA14]], !noalias [[META74]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META74]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META74]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META74]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META74]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META77:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META77]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I3_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I6_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I4_I]] to ptr addrspace(4) +// CHECK-NEXT: store <16 x i16> [[AGG_TMP1_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I3_I]], align 32, !tbaa [[TBAA14]], !noalias [[META77]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META77]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I4_I]], align 4, !tbaa [[TBAA14]], !noalias [[META77]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META77]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META77]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META80:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META80]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I9_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I7_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I10_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I8_I]] to ptr addrspace(4) +// CHECK-NEXT: store <16 x i16> [[AGG_TMP2_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I7_I]], align 32, !tbaa [[TBAA14]], !noalias [[META80]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I9_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I10_I]]) #[[ATTR5]], !noalias [[META80]] +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I8_I]], align 4, !tbaa [[TBAA14]], !noalias [[META80]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META80]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META80]] +// CHECK-NEXT: [[CALL3_I_I:%.*]] = call spir_func noundef <16 x float> @_Z15__spirv_ocl_fmaDv16_fS_S_(<16 x float> noundef [[TMP0]], <16 x float> noundef [[TMP1]], <16 x float> noundef [[TMP2]]) #[[ATTR6]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I16_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I14_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I17_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I15_I]] to ptr addrspace(4) +// CHECK-NEXT: store <16 x float> [[CALL3_I_I]], ptr [[VEC_ADDR_I_I_I_I14_I]], align 64, !tbaa [[TBAA14]], !noalias [[META86]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I16_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I17_I]]) #[[ATTR5]], !noalias [[META86]] +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[DST_I_I_I_I15_I]], align 2, !tbaa [[TBAA14]], !noalias [[META86]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr addrspace(4) [[AGG_RESULT]], align 32, !alias.scope [[META86]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFMA(vec a, vec b, + vec c) { + return experimental::fma(a, b, c); +} diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 7fdb4a02b39b0..f7880e90d1583 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 4 // NOTE: ..., followed by some manual cleanup. -// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers %s -fsycl-device-only -o - | FileCheck %s +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers %s -fsycl-device-only -o - | FileCheck %s // REQUIRES: linux #include diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp new file mode 100644 index 0000000000000..436852b841577 --- /dev/null +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat_preview.cpp @@ -0,0 +1,220 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 4 +// NOTE: ..., followed by some manual cleanup. + +// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers %s -fsycl-device-only -o - | FileCheck %s +// REQUIRES: linux && preview-breaking-changes-supported + +#include +#include +#include +#include + +using namespace sycl; +using bfloat16 = sycl::ext::oneapi::bfloat16; + +// CHECK-LABEL: define dso_local spir_func void @_Z18TestBFtoFDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11:![0-9]+]], !noalias [[META8]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4:[0-9]+]], !noalias [[META8]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z17TestBFtoFDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META14:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 +// CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11]], !noalias [[META15]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META15]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.71") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <3 x i32> +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i32> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EE7CONVERTIILNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rz(i16 noundef zeroext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META19]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.110") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META25]] +// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META25]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z18TestFtoBFDeviceRNERN4sycl3_V13vecIfLi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META28:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA11]], !noalias [[META29]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META32:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <3 x i32> +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIFLI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x float> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 16 dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[RETVAL1_SROA_0_0_I_I_I:%.*]] = phi <3 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIILI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i32> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META38]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: +// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.149") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META43]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META43]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { + return inp.template convert(); +} + +// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.229") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] +// CHECK: for.cond.i.i.i: +// CHECK-NEXT: [[RETVAL1_0_I_I_I:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_I_I_I:%.*]], [[FOR_BODY_I_I_I]] ] +// CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp samesign ult i32 [[I_0_I_I_I]], 2 +// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECISLI2EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE4EEENS1_IT_LI2EEEV_EXIT:%.*]] +// CHECK: for.body.i.i.i: +// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <2 x i16> [[TMP0]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_short2bfloat16_rd(i16 noundef signext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META49]] +// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <2 x i16> [[RETVAL1_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] +// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK: _ZNK4sycl3_V13vecIsLi2EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE4EEENS1_IT_Li2EEEv.exit: +// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META49]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestShorttoBFDeviceRTN(vec &inp) { + return inp.template convert(); +} diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 6a90aeef73f6d..985aac0d084d7 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // NOTE: ..., followed by some manual cleanup. -// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s +// RUN: %clangxx -I %sycl_include -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s // Windows/linux have some slight differences in IR generation (function // arguments passing and long/long long differences/mangling) that could diff --git a/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp b/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp new file mode 100644 index 0000000000000..708d553220964 --- /dev/null +++ b/sycl/test/check_device_code/vector/vector_math_ops_preview.cpp @@ -0,0 +1,391 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// NOTE: ..., followed by some manual cleanup. + +// RUN: %clangxx -I %sycl_include -fpreview-breaking-changes -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -disable-lifetime-markers -fsycl-device-only %s -o - | FileCheck %s + +// Windows/linux have some slight differences in IR generation (function +// arguments passing and long/long long differences/mangling) that could +// complicate test updates while not improving test coverage. Limiting to linux +// should be fine. +// REQUIRES: linux && preview-breaking-changes-supported + +// This test checks +// (1) the storage type of sycl::vec on device for all data types, and +// (2) the device code for various math operations on sycl::vec. +#include + +using namespace sycl; + +/*************** Binary Arithmetic Ops ******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIiLi2EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META36:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META36]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META36]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } + +// std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. +// CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.115") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.115") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META44:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META44]] +// CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestXor(vec a, vec b) { + return a ^ b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.126") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META55:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA14]], !noalias [[META55]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP1]] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne <4 x i8> [[TMP0]], [[ADD_I_I_I_I_I]] +// CHECK-NEXT: [[SEXT_NEG_I_I:%.*]] = zext <4 x i1> [[CMP_I_I]] to <4 x i8> +// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.167") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.167") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } + +// CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.208") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.208", align 8 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META79:![0-9]+]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST4PLUSIVET_EENS0_3VECIS5_LI3EEEE4TYPEERKSB_SF__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] +// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86:![0-9]+]], !noalias [[META83]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META79]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META79]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestAdd(vec a, + vec b) { + return a + b; +} + +/***************** Binary Logical Ops *******************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.247") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.247") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA14]], !noalias [[META99:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META99]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !alias.scope [[META99]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { + return a > b; +} + +// CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.289") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 +// CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVECN_I_I]], [[LOADVECN_I_I2]] +// CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> +// CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[CMP]] to <3 x i8> +// CHECK-NEXT: ret <3 x i8> [[SEXT]] +// +SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { + return a > b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.298") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.337") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA14]], !noalias [[META108:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META108]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> +// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { + return a > b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.380") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META119:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META119]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META119]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { + return a > b; +} + +// CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.462") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.501") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.462", align 8 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META127:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST7GREATERIVET_EENS0_3VECISLI4EEEE4TYPEERKNSA_IS5_LI4EEESG__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] +// CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 +// CHECK-NEXT: [[ARRAYIDX_I_I_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META127]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META127]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestGreaterThan(vec a, + vec b) { + return a > b; +} + +/********************** Unary Ops **********************/ + +// CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.539") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.539") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] +// CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I_I]], <4 x i32> poison, <3 x i32> +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META136]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.579") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.579") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META144:![0-9]+]] +// CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META144]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } + +// Negation is not valid for std::byte. Therefore, using bitwise negation. +// CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.618") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.618") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META152:![0-9]+]] +// CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META152]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.628") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.126") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META160:![0-9]+]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.668") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.707") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META171:![0-9]+]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.419") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.419") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META182:![0-9]+]] +// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META182]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.748") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.208") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.748", align 8 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META190:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META190]] +// CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 +// CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 +// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META190]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META190]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } + +// CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.786") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.786") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.786", align 32 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 16 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] +// CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I]] +// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86]], !noalias [[META202]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META202]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193]] +// CHECK-NEXT: ret void +// +SYCL_EXTERNAL auto TestMinus(vec a) { return -a; }