Skip to content

Commit e75720b

Browse files
[Clang][SME] Add missing IsStreamingCompatible flag to svget, svcreate & svset (#78430)
1 parent 49e3e75 commit e75720b

25 files changed

+326
-168
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,19 +1281,19 @@ def SVUNDEF_2 : SInst<"svundef2_{d}", "2v", "csilUcUsUiUlhfd", MergeNone, "", [I
12811281
def SVUNDEF_3 : SInst<"svundef3_{d}", "3v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12821282
def SVUNDEF_4 : SInst<"svundef4_{d}", "4v", "csilUcUsUiUlhfd", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12831283

1284-
def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
1285-
def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
1286-
def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate]>;
1284+
def SVCREATE_2 : SInst<"svcreate2[_{d}]", "2dd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
1285+
def SVCREATE_3 : SInst<"svcreate3[_{d}]", "3ddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
1286+
def SVCREATE_4 : SInst<"svcreate4[_{d}]", "4dddd", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
12871287

12881288
let TargetGuard = "sve,bf16" in {
12891289
def SVUNDEF_1_BF16 : SInst<"svundef_{d}", "dv", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12901290
def SVUNDEF_2_BF16 : SInst<"svundef2_{d}", "2v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12911291
def SVUNDEF_3_BF16 : SInst<"svundef3_{d}", "3v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12921292
def SVUNDEF_4_BF16 : SInst<"svundef4_{d}", "4v", "b", MergeNone, "", [IsUndef, IsStreamingCompatible]>;
12931293

1294-
def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate]>;
1295-
def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate]>;
1296-
def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate]>;
1294+
def SVCREATE_2_BF16 : SInst<"svcreate2[_{d}]", "2dd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
1295+
def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
1296+
def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
12971297
}
12981298

12991299
let TargetGuard = "sve2p1|sme2" in {
@@ -1303,22 +1303,22 @@ let TargetGuard = "sve2p1|sme2" in {
13031303

13041304
////////////////////////////////////////////////////////////////////////////////
13051305
// Vector insertion and extraction
1306-
def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
1307-
def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
1308-
def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
1306+
def SVGET_2 : SInst<"svget2[_{d}]", "d2i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
1307+
def SVGET_3 : SInst<"svget3[_{d}]", "d3i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
1308+
def SVGET_4 : SInst<"svget4[_{d}]", "d4i", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
13091309

1310-
def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
1311-
def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
1312-
def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
1310+
def SVSET_2 : SInst<"svset2[_{d}]", "22id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
1311+
def SVSET_3 : SInst<"svset3[_{d}]", "33id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
1312+
def SVSET_4 : SInst<"svset4[_{d}]", "44id", "csilUcUsUiUlhfd", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
13131313

13141314
let TargetGuard = "sve,bf16" in {
1315-
def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
1316-
def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_2>]>;
1317-
def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
1315+
def SVGET_2_BF16 : SInst<"svget2[_{d}]", "d2i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
1316+
def SVGET_3_BF16 : SInst<"svget3[_{d}]", "d3i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
1317+
def SVGET_4_BF16 : SInst<"svget4[_{d}]", "d4i", "b", MergeNone, "", [IsTupleGet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
13181318

1319-
def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_1>]>;
1320-
def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_2>]>;
1321-
def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
1319+
def SVSET_2_BF16 : SInst<"svset2[_{d}]", "22id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
1320+
def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_2>]>;
1321+
def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet, IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
13221322
}
13231323

13241324
let TargetGuard = "sve2p1|sme2" in {

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
44
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
55
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
67

78
// REQUIRES: aarch64-registered-target
89

@@ -15,6 +16,12 @@
1516
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
1617
#endif
1718

19+
#ifndef TEST_SME
20+
#define ATTR
21+
#else
22+
#define ATTR __arm_streaming
23+
#endif
24+
1825
// CHECK-LABEL: @test_svcreate2_bf16(
1926
// CHECK-NEXT: entry:
2027
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[X0:%.*]], i64 0)
@@ -27,7 +34,7 @@
2734
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[X1:%.*]], i64 8)
2835
// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP1]]
2936
//
30-
svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1)
37+
svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR
3138
{
3239
return SVE_ACLE_FUNC(svcreate2,_bf16,,)(x0, x1);
3340
}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
44
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
55
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
67

78
// REQUIRES: aarch64-registered-target
89

@@ -15,6 +16,12 @@
1516
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
1617
#endif
1718

19+
#ifndef TEST_SME
20+
#define ATTR
21+
#else
22+
#define ATTR __arm_streaming
23+
#endif
24+
1825
// CHECK-LABEL: @test_svcreate2_s8(
1926
// CHECK-NEXT: entry:
2027
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[X0:%.*]], i64 0)
@@ -27,7 +34,7 @@
2734
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP0]], <vscale x 16 x i8> [[X1:%.*]], i64 16)
2835
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP1]]
2936
//
30-
svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1)
37+
svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR
3138
{
3239
return SVE_ACLE_FUNC(svcreate2,_s8,,)(x0, x1);
3340
}
@@ -44,7 +51,7 @@ svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1)
4451
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP0]], <vscale x 8 x i16> [[X1:%.*]], i64 8)
4552
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP1]]
4653
//
47-
svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1)
54+
svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR
4855
{
4956
return SVE_ACLE_FUNC(svcreate2,_s16,,)(x0, x1);
5057
}
@@ -61,7 +68,7 @@ svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1)
6168
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP0]], <vscale x 4 x i32> [[X1:%.*]], i64 4)
6269
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP1]]
6370
//
64-
svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1)
71+
svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR
6572
{
6673
return SVE_ACLE_FUNC(svcreate2,_s32,,)(x0, x1);
6774
}
@@ -78,7 +85,7 @@ svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1)
7885
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP0]], <vscale x 2 x i64> [[X1:%.*]], i64 2)
7986
// CPP-CHECK-NEXT: ret <vscale x 4 x i64> [[TMP1]]
8087
//
81-
svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1)
88+
svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR
8289
{
8390
return SVE_ACLE_FUNC(svcreate2,_s64,,)(x0, x1);
8491
}
@@ -95,7 +102,7 @@ svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1)
95102
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP0]], <vscale x 16 x i8> [[X1:%.*]], i64 16)
96103
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP1]]
97104
//
98-
svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1)
105+
svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR
99106
{
100107
return SVE_ACLE_FUNC(svcreate2,_u8,,)(x0, x1);
101108
}
@@ -112,7 +119,7 @@ svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1)
112119
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP0]], <vscale x 8 x i16> [[X1:%.*]], i64 8)
113120
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP1]]
114121
//
115-
svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1)
122+
svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR
116123
{
117124
return SVE_ACLE_FUNC(svcreate2,_u16,,)(x0, x1);
118125
}
@@ -129,7 +136,7 @@ svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1)
129136
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP0]], <vscale x 4 x i32> [[X1:%.*]], i64 4)
130137
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP1]]
131138
//
132-
svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1)
139+
svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR
133140
{
134141
return SVE_ACLE_FUNC(svcreate2,_u32,,)(x0, x1);
135142
}
@@ -146,7 +153,7 @@ svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1)
146153
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP0]], <vscale x 2 x i64> [[X1:%.*]], i64 2)
147154
// CPP-CHECK-NEXT: ret <vscale x 4 x i64> [[TMP1]]
148155
//
149-
svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1)
156+
svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR
150157
{
151158
return SVE_ACLE_FUNC(svcreate2,_u64,,)(x0, x1);
152159
}
@@ -163,7 +170,7 @@ svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1)
163170
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP0]], <vscale x 8 x half> [[X1:%.*]], i64 8)
164171
// CPP-CHECK-NEXT: ret <vscale x 16 x half> [[TMP1]]
165172
//
166-
svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1)
173+
svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR
167174
{
168175
return SVE_ACLE_FUNC(svcreate2,_f16,,)(x0, x1);
169176
}
@@ -180,7 +187,7 @@ svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1)
180187
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP0]], <vscale x 4 x float> [[X1:%.*]], i64 4)
181188
// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP1]]
182189
//
183-
svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1)
190+
svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR
184191
{
185192
return SVE_ACLE_FUNC(svcreate2,_f32,,)(x0, x1);
186193
}
@@ -197,7 +204,7 @@ svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1)
197204
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP0]], <vscale x 2 x double> [[X1:%.*]], i64 2)
198205
// CPP-CHECK-NEXT: ret <vscale x 4 x double> [[TMP1]]
199206
//
200-
svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1)
207+
svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) ATTR
201208
{
202209
return SVE_ACLE_FUNC(svcreate2,_f64,,)(x0, x1);
203210
}

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
44
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
55
// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -DTEST_SME -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
67

78
// REQUIRES: aarch64-registered-target
89

@@ -15,6 +16,12 @@
1516
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
1617
#endif
1718

19+
#ifndef TEST_SME
20+
#define ATTR
21+
#else
22+
#define ATTR __arm_streaming
23+
#endif
24+
1825
// CHECK-LABEL: @test_svcreate3_bf16(
1926
// CHECK-NEXT: entry:
2027
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> poison, <vscale x 8 x bfloat> [[X0:%.*]], i64 0)
@@ -29,7 +36,7 @@
2936
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[X2:%.*]], i64 16)
3037
// CPP-CHECK-NEXT: ret <vscale x 24 x bfloat> [[TMP2]]
3138
//
32-
svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2)
39+
svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR
3340
{
3441
return SVE_ACLE_FUNC(svcreate3,_bf16,,)(x0, x1, x2);
3542
}

0 commit comments

Comments
 (0)