Skip to content

Commit 5b0cd17

Browse files
[Clang][llvm] Implement fp8 FMOP4A intrinsics (#130127)
Implement all mf8 FMOP4A instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. It also updates previous mop4 instructions from IntrNoMem to IntrInaccessibleMemOnly
1 parent 386ff11 commit 5b0cd17

File tree

7 files changed

+366
-18
lines changed

7 files changed

+366
-18
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,24 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in {
321321
defm SVBMOP4S_H : MOP4<"s", "_za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>;
322322
}
323323

324+
////////////////////////////////////////////////////////////////////////////////
325+
// SME2 - FP8 FMOP4A, FMOP4S
326+
327+
multiclass MOP4_FP8<string za, list<ImmCheck> checks> {
328+
def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming], checks>;
329+
def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming], checks>;
330+
def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming], checks>;
331+
def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming], checks>;
332+
}
333+
334+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in {
335+
defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", [ImmCheck<0, ImmCheck0_3>]>;
336+
}
337+
338+
let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in {
339+
defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", [ImmCheck<0, ImmCheck0_1>]>;
340+
}
341+
324342
////////////////////////////////////////////////////////////////////////////////
325343
// SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S
326344

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
4+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
5+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
9+
10+
#include <arm_sme.h>
11+
12+
#ifdef SME_OVERLOADED_FORMS
13+
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
14+
#else
15+
#define SME_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
16+
#endif
17+
18+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za16_mf8_mf8_fpm(
19+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
20+
// CHECK-NEXT: [[ENTRY:.*:]]
21+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
22+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
23+
// CHECK-NEXT: ret void
24+
//
25+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za16_mf8_mf8_fpmu13__SVMfloat8_tS_m(
26+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
27+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
28+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
29+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
30+
// CPP-CHECK-NEXT: ret void
31+
//
32+
void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
33+
SME_ACLE_FUNC(svmop4a,_1x1,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
34+
}
35+
36+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za16_mf8_mf8_fpm(
37+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
38+
// CHECK-NEXT: [[ENTRY:.*:]]
39+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
40+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
41+
// CHECK-NEXT: ret void
42+
//
43+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za16_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm(
44+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
45+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
46+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
47+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
48+
// CPP-CHECK-NEXT: ret void
49+
//
50+
void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
51+
SME_ACLE_FUNC(svmop4a,_1x2,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
52+
}
53+
54+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za16_mf8_mf8_fpm(
55+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
56+
// CHECK-NEXT: [[ENTRY:.*:]]
57+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
58+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
59+
// CHECK-NEXT: ret void
60+
//
61+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm(
62+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
63+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
64+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
65+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
66+
// CPP-CHECK-NEXT: ret void
67+
//
68+
void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
69+
SME_ACLE_FUNC(svmop4a,_2x1,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
70+
}
71+
72+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za16_mf8_mf8_fpm(
73+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
74+
// CHECK-NEXT: [[ENTRY:.*:]]
75+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
76+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
77+
// CHECK-NEXT: ret void
78+
//
79+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za16_mf8_mf8_fpm13svmfloat8x2_tS_m(
80+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
81+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
82+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
83+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
84+
// CPP-CHECK-NEXT: ret void
85+
//
86+
void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
87+
SME_ACLE_FUNC(svmop4a,_2x2,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
88+
}
89+
90+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za32_mf8_mf8_fpm(
91+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
92+
// CHECK-NEXT: [[ENTRY:.*:]]
93+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
94+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
95+
// CHECK-NEXT: ret void
96+
//
97+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za32_mf8_mf8_fpmu13__SVMfloat8_tS_m(
98+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
99+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
100+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
101+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
102+
// CPP-CHECK-NEXT: ret void
103+
//
104+
void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
105+
SME_ACLE_FUNC(svmop4a,_1x1,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
106+
}
107+
108+
// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za32_mf8_mf8_fpm(
109+
// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
110+
// CHECK-NEXT: [[ENTRY:.*:]]
111+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
112+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
113+
// CHECK-NEXT: ret void
114+
//
115+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za32_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm(
116+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
117+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
118+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
119+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
120+
// CPP-CHECK-NEXT: ret void
121+
//
122+
void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
123+
SME_ACLE_FUNC(svmop4a,_1x2,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
124+
}
125+
126+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za32_mf8_mf8_fpm(
127+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
128+
// CHECK-NEXT: [[ENTRY:.*:]]
129+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
130+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
131+
// CHECK-NEXT: ret void
132+
//
133+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm(
134+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
135+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
136+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
137+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
138+
// CPP-CHECK-NEXT: ret void
139+
//
140+
void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
141+
SME_ACLE_FUNC(svmop4a,_2x1,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
142+
}
143+
144+
// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za32_mf8_mf8_fpm(
145+
// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
146+
// CHECK-NEXT: [[ENTRY:.*:]]
147+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
148+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
149+
// CHECK-NEXT: ret void
150+
//
151+
// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za32_mf8_mf8_fpm13svmfloat8x2_tS_m(
152+
// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
153+
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
154+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
155+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
156+
// CPP-CHECK-NEXT: ret void
157+
//
158+
void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
159+
SME_ACLE_FUNC(svmop4a,_2x2,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr);
160+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
3+
// RUN: -target-feature +sme -target-feature +sme2p2 -target-feature +sme-mop4 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -fsyntax-only -verify %s
4+
5+
// REQUIRES: aarch64-registered-target
6+
7+
#include <arm_sme.h>
8+
9+
void tests_mop4_imm_1x1(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
10+
svmop4a_1x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
11+
svmop4a_1x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
12+
return;
13+
}
14+
15+
void tests_mop4_imm_1x2(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
16+
svmop4a_1x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
17+
svmop4a_1x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
18+
return;
19+
}
20+
21+
void tests_mop4_imm_2x1(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
22+
svmop4a_2x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
23+
svmop4a_2x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
24+
return;
25+
}
26+
27+
void tests_mop4_imm_2x2(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") {
28+
svmop4a_2x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
29+
svmop4a_2x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
30+
return;
31+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3068,22 +3068,22 @@ let TargetPrefix = "aarch64" in {
30683068
: DefaultAttrsIntrinsic<[],
30693069
[llvm_i32_ty,
30703070
llvm_anyvector_ty,
3071-
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3071+
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
30723072

30733073
class SME_OuterProduct_QuarterTile_Single_Multi
30743074
: DefaultAttrsIntrinsic<[],
30753075
[llvm_i32_ty,
30763076
llvm_anyvector_ty,
30773077
LLVMMatchType<0>,
3078-
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3078+
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
30793079

30803080
class SME_OuterProduct_QuarterTile_Multi_Multi
30813081
: DefaultAttrsIntrinsic<[],
30823082
[llvm_i32_ty,
30833083
llvm_anyvector_ty,
30843084
LLVMMatchType<0>,
30853085
LLVMMatchType<0>,
3086-
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3086+
LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
30873087

30883088
// 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
30893089
foreach mode = ["s", "a"] in {
@@ -3125,6 +3125,14 @@ let TargetPrefix = "aarch64" in {
31253125
def int_aarch64_sme_sutmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
31263126
def int_aarch64_sme_ustmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
31273127

3128+
// 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
3129+
foreach za = ["za16", "za32"] in {
3130+
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
3131+
def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
3132+
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi;
3133+
def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi;
3134+
}
3135+
31283136
class SME_AddVectorToTile_Intrinsic
31293137
: DefaultAttrsIntrinsic<[],
31303138
[llvm_i32_ty,

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,8 +1091,8 @@ let Predicates = [HasSME2p2] in {
10911091

10921092
} // [HasSME2p2]
10931093

1094-
let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in {
1095-
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">;
1094+
let Predicates = [HasSME_MOP4, HasSMEF8F16] in {
1095+
defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za16">;
10961096
}
10971097

10981098
let Predicates = [HasSME_MOP4, HasSMEF16F16] in {
@@ -1105,10 +1105,8 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in {
11051105
defm BFMUL : sme2_bfmul_multi<"bfmul">;
11061106
}
11071107

1108-
let Uses = [FPMR, FPCR] in {
11091108
let Predicates = [HasSME_MOP4, HasSMEF8F32] in {
1110-
defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">;
1111-
}
1109+
defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za32">;
11121110
}
11131111

11141112
let Predicates = [HasSME_MOP4, HasSMEB16B16] in {

0 commit comments

Comments
 (0)