-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[Clang][SME] Add missing IsStreamingCompatible flag to svget, svcreate & svset #78430
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Kerry McLaughlin (kmclaughlin-arm) ChangesPatch is 110.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78430.diff 25 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 02039524c41dc5..51cf8e29129c69 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1281,9 +1281,9 @@ def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", [I
def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
-def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
-def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
-def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
+def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
+def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
+def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
let TargetGuard = "sve,bf16" in {
def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
@@ -1291,9 +1291,9 @@ def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef, I
def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
-def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate]>;
-def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate]>;
-def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate]>;
+def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
+def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
+def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
}
let TargetGuard = "sve2p1|sme2" in {
@@ -1303,22 +1303,22 @@ let TargetGuard = "sve2p1|sme2" in {
////////////////////////////////////////////////////////////////////////////////
// Vector insertion and extraction
-def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
-def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
-def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
+def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
-def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
-def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
-def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
+def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
+def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
let TargetGuard = "sve,bf16" in {
-def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
-def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
-def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
+def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
+def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
-def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
-def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
-def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
+def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
+def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
}
let TargetGuard = "sve2p1|sme2" in {
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
index 4058af0051359c..1ed09cc5965fda 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
@@ -3,6 +3,7 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// REQUIRES: aarch64-registered-target
@@ -15,6 +16,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifndef TEST_SME
+#define ATTR
+#else
+#define ATTR __arm_streaming
+#endif
+
// CHECK-LABEL: @test_svcreate2_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[X0:%.*]], i64 0)
@@ -27,7 +34,7 @@
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[X1:%.*]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP1]]
//
-svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1)
+svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1);
}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
index 5051ab88d92260..1aead4e5572f33 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
@@ -3,6 +3,7 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// REQUIRES: aarch64-registered-target
@@ -15,6 +16,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifndef TEST_SME
+#define ATTR
+#else
+#define ATTR __arm_streaming
+#endif
+
// CHECK-LABEL: @test_svcreate2_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[X0:%.*]], i64 0)
@@ -27,7 +34,7 @@
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP0]], <vscale x 16 x i8> [[X1:%.*]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP1]]
//
-svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1)
+svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_s8,,)(x0, x1);
}
@@ -44,7 +51,7 @@ svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP0]], <vscale x 8 x i16> [[X1:%.*]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP1]]
//
-svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1)
+svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_s16,,)(x0, x1);
}
@@ -61,7 +68,7 @@ svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP0]], <vscale x 4 x i32> [[X1:%.*]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP1]]
//
-svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1)
+svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_s32,,)(x0, x1);
}
@@ -78,7 +85,7 @@ svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP0]], <vscale x 2 x i64> [[X1:%.*]], i64 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x i64> [[TMP1]]
//
-svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1)
+svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_s64,,)(x0, x1);
}
@@ -95,7 +102,7 @@ svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP0]], <vscale x 16 x i8> [[X1:%.*]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP1]]
//
-svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1)
+svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_u8,,)(x0, x1);
}
@@ -112,7 +119,7 @@ svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP0]], <vscale x 8 x i16> [[X1:%.*]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP1]]
//
-svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1)
+svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_u16,,)(x0, x1);
}
@@ -129,7 +136,7 @@ svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP0]], <vscale x 4 x i32> [[X1:%.*]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP1]]
//
-svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1)
+svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_u32,,)(x0, x1);
}
@@ -146,7 +153,7 @@ svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP0]], <vscale x 2 x i64> [[X1:%.*]], i64 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x i64> [[TMP1]]
//
-svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1)
+svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_u64,,)(x0, x1);
}
@@ -163,7 +170,7 @@ svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP0]], <vscale x 8 x half> [[X1:%.*]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x half> [[TMP1]]
//
-svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1)
+svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_f16,,)(x0, x1);
}
@@ -180,7 +187,7 @@ svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP0]], <vscale x 4 x float> [[X1:%.*]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP1]]
//
-svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1)
+svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_f32,,)(x0, x1);
}
@@ -197,7 +204,7 @@ svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1)
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP0]], <vscale x 2 x double> [[X1:%.*]], i64 2)
// CPP-CHECK-NEXT: ret <vscale x 4 x double> [[TMP1]]
//
-svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1)
+svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) ATTR
{
return SVE_ACLE_FUNC(svcreate2,_f64,,)(x0, x1);
}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
index 85eff362411638..90176ff0dbd481 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
@@ -3,6 +3,7 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// REQUIRES: aarch64-registered-target
@@ -15,6 +16,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifndef TEST_SME
+#define ATTR
+#else
+#define ATTR __arm_streaming
+#endif
+
// CHECK-LABEL: @test_svcreate3_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[X0:%.*]], i64 0)
@@ -29,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[X2:%.*]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 24 x bfloat> [[TMP2]]
//
-svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2)
+svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR
{
return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2);
}
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
index 3d6f34d5234cf3..2fe1a88441b290 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
@@ -3,6 +3,7 @@
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// REQUIRES: aarch64-registered-target
@@ -15,6 +16,12 @@
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
#endif
+#ifndef TEST_SME
+#define ATTR
+#else
+#define ATTR __arm_streaming
+#endif
+
// CHECK-LABEL: @test_svcreate3_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> poison, <vscale x 16 x i8> [[X0:%.*]], i64 0)
@@ -29,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP1]], <vscale x 16 x i8> [[X2:%.*]], i64 32)
// CPP-CHECK-NEXT: ret <vscale x 48 x i8> [[TMP2]]
//
-svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2)
+svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) ATTR
{
return SVE_ACLE_FUNC(svcreate3,_s8,,)(x0, x1, x2);
}
@@ -48,7 +55,7 @@ svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2)
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP1]], <vscale x 8 x i16> [[X2:%.*]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 24 x i16> [[TMP2]]
//
-svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2)
+svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) ATTR
{
return SVE_ACLE_FUNC(svcreate3,_s16,,)(x0, x1, x2);
}
@@ -67,7 +74,7 @@ svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2)
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP1]], <vscale x 4 x i32> [[X2:%.*]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 12 x i32> [[TMP2]]
//
-svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2)
+svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) ATTR
{
return SVE_ACLE_FUNC(svcreate3,_s32,,)(x0, x1, x2);
}
@@ -86,7 +93,7 @@ svint32x3_t t...
[truncated]
|
sdesmalen-arm
approved these changes
Jan 17, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
ampandey-1995
pushed a commit
to ampandey-1995/llvm-project
that referenced
this pull request
Jan 19, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
clang:codegen
IR generation bugs: mangling, exceptions, etc.
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.