Skip to content

Commit 3df66bf

Browse files
ghehgsmeenai
authored andcommitted
[CIR][CIRGen] Generate CIR for neon_vget and neon_vdup lane intrinsics (llvm#884)
as title. This PR has simliar test case organization as to [PR882](llvm#882) Notice that comparing to OG, this PR combines cases for some pairs of intrinsics such as BI__builtin_neon_vget_lane_f32 and BI__builtin_neon_vdups_lane_f32. They have the same code generated in OG and CIRGen OG separate them into different case handling because it passes mnemonics which are different. CIRGen doesn't pass that so why not combine them. Co-authored-by: Guojin He <[email protected]>
1 parent b42dcea commit 3df66bf

File tree

3 files changed

+485
-16
lines changed

3 files changed

+485
-16
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+50-16
Original file line numberDiff line numberDiff line change
@@ -2186,42 +2186,76 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
21862186

21872187
case NEON::BI__builtin_neon_vget_lane_i8:
21882188
case NEON::BI__builtin_neon_vdupb_lane_i8:
2189-
llvm_unreachable("NYI");
2189+
Ops[0] = builder.createBitcast(
2190+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 8));
2191+
return builder.create<mlir::cir::VecExtractOp>(
2192+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21902193
case NEON::BI__builtin_neon_vgetq_lane_i8:
21912194
case NEON::BI__builtin_neon_vdupb_laneq_i8:
2192-
llvm_unreachable("NYI");
2195+
Ops[0] = builder.createBitcast(
2196+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 16));
2197+
return builder.create<mlir::cir::VecExtractOp>(
2198+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21932199
case NEON::BI__builtin_neon_vget_lane_i16:
21942200
case NEON::BI__builtin_neon_vduph_lane_i16:
2195-
llvm_unreachable("NYI");
2201+
Ops[0] = builder.createBitcast(
2202+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 4));
2203+
return builder.create<mlir::cir::VecExtractOp>(
2204+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21962205
case NEON::BI__builtin_neon_vgetq_lane_i16:
21972206
case NEON::BI__builtin_neon_vduph_laneq_i16:
2198-
llvm_unreachable("NYI");
2207+
Ops[0] = builder.createBitcast(
2208+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 8));
2209+
return builder.create<mlir::cir::VecExtractOp>(
2210+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21992211
case NEON::BI__builtin_neon_vget_lane_i32:
22002212
case NEON::BI__builtin_neon_vdups_lane_i32:
2201-
llvm_unreachable("NYI");
2213+
Ops[0] = builder.createBitcast(
2214+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 2));
2215+
return builder.create<mlir::cir::VecExtractOp>(
2216+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
2217+
case NEON::BI__builtin_neon_vget_lane_f32:
22022218
case NEON::BI__builtin_neon_vdups_lane_f32:
2203-
llvm_unreachable("NYI");
2219+
Ops[0] = builder.createBitcast(
2220+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 2));
2221+
return builder.create<mlir::cir::VecExtractOp>(
2222+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22042223
case NEON::BI__builtin_neon_vgetq_lane_i32:
22052224
case NEON::BI__builtin_neon_vdups_laneq_i32:
2206-
llvm_unreachable("NYI");
2225+
Ops[0] = builder.createBitcast(
2226+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 4));
2227+
return builder.create<mlir::cir::VecExtractOp>(
2228+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22072229
case NEON::BI__builtin_neon_vget_lane_i64:
22082230
case NEON::BI__builtin_neon_vdupd_lane_i64:
2209-
llvm_unreachable("NYI");
2231+
Ops[0] = builder.createBitcast(
2232+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 1));
2233+
return builder.create<mlir::cir::VecExtractOp>(
2234+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22102235
case NEON::BI__builtin_neon_vdupd_lane_f64:
2211-
llvm_unreachable("NYI");
2236+
case NEON::BI__builtin_neon_vget_lane_f64:
2237+
Ops[0] = builder.createBitcast(
2238+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 1));
2239+
return builder.create<mlir::cir::VecExtractOp>(
2240+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22122241
case NEON::BI__builtin_neon_vgetq_lane_i64:
22132242
case NEON::BI__builtin_neon_vdupd_laneq_i64:
2214-
llvm_unreachable("NYI");
2215-
case NEON::BI__builtin_neon_vget_lane_f32:
2216-
llvm_unreachable("NYI");
2217-
case NEON::BI__builtin_neon_vget_lane_f64:
2218-
llvm_unreachable("NYI");
2243+
Ops[0] = builder.createBitcast(
2244+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 2));
2245+
return builder.create<mlir::cir::VecExtractOp>(
2246+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22192247
case NEON::BI__builtin_neon_vgetq_lane_f32:
22202248
case NEON::BI__builtin_neon_vdups_laneq_f32:
2221-
llvm_unreachable("NYI");
2249+
Ops[0] = builder.createBitcast(
2250+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 4));
2251+
return builder.create<mlir::cir::VecExtractOp>(
2252+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22222253
case NEON::BI__builtin_neon_vgetq_lane_f64:
22232254
case NEON::BI__builtin_neon_vdupd_laneq_f64:
2224-
llvm_unreachable("NYI");
2255+
Ops[0] = builder.createBitcast(
2256+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 2));
2257+
return builder.create<mlir::cir::VecExtractOp>(
2258+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22252259
case NEON::BI__builtin_neon_vaddh_f16:
22262260
llvm_unreachable("NYI");
22272261
case NEON::BI__builtin_neon_vsubh_f16:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
2+
// RUN: -emit-cir -target-feature +neon %s -o %t.cir
3+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
5+
// RUN: -emit-llvm -target-feature +neon %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
7+
8+
// Tetsting normal situation of vdup lane intrinsics.
9+
10+
// REQUIRES: aarch64-registered-target || arm-registered-target
11+
#include <arm_neon.h>
12+
13+
int8_t test_vdupb_lane_s8(int8x8_t src) {
14+
return vdupb_lane_s8(src, 7);
15+
}
16+
17+
// CIR-LABEL: test_vdupb_lane_s8
18+
// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
19+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
20+
21+
// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]])
22+
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
23+
// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
24+
// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
25+
// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8
26+
// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8
27+
// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7
28+
// LLVM: ret i8 {{%.*}}
29+
30+
int8_t test_vdupb_laneq_s8(int8x16_t a) {
31+
return vdupb_laneq_s8(a, 15);
32+
}
33+
34+
// CIR-LABEL: test_vdupb_laneq_s8
35+
// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
36+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
37+
38+
// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]])
39+
// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
40+
// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
41+
// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
42+
// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16
43+
// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16
44+
// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15
45+
// LLVM: ret i8 {{%.*}}
46+
47+
int16_t test_vduph_lane_s16(int16x4_t src) {
48+
return vduph_lane_s16(src, 3);
49+
}
50+
51+
// CIR-LABEL: test_vduph_lane_s16
52+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
53+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
54+
55+
56+
// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]])
57+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
58+
// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
59+
// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
60+
// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8
61+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8
62+
// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3
63+
// LLVM: ret i16 {{%.*}}
64+
65+
int16_t test_vduph_laneq_s16(int16x8_t a) {
66+
return vduph_laneq_s16(a, 7);
67+
}
68+
69+
// CIR-LABEL: test_vduph_laneq_s16
70+
// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
71+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
72+
73+
// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]])
74+
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
75+
// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
76+
// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
77+
// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16
78+
// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16
79+
// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7
80+
// LLVM: ret i16 {{%.*}}
81+
82+
int32_t test_vdups_lane_s32(int32x2_t a) {
83+
return vdups_lane_s32(a, 1);
84+
}
85+
86+
// CIR-LABEL: test_vdups_lane_s32
87+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
88+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
89+
90+
// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]])
91+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
92+
// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
93+
// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
94+
// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8
95+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8
96+
// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1
97+
// LLVM: ret i32 {{%.*}}
98+
99+
int32_t test_vdups_laneq_s32(int32x4_t a) {
100+
return vdups_laneq_s32(a, 3);
101+
}
102+
103+
// CIR-LABEL: test_vdups_laneq_s32
104+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
105+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
106+
107+
// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]])
108+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
109+
// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
110+
// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
111+
// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16
112+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16
113+
// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3
114+
// LLVM: ret i32 {{%.*}}
115+
116+
int64_t test_vdupd_lane_s64(int64x1_t src) {
117+
return vdupd_lane_s64(src, 0);
118+
}
119+
120+
// CIR-LABEL: test_vdupd_lane_s64
121+
// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
122+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
123+
124+
// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]])
125+
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
126+
// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
127+
// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
128+
// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8
129+
// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8
130+
// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0
131+
// LLVM: ret i64 {{%.*}}
132+
133+
int64_t test_vdupd_laneq_s64(int64x2_t a) {
134+
return vdupd_laneq_s64(a, 1);
135+
}
136+
137+
// CIR-LABEL: test_vdupd_laneq_s64
138+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
139+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
140+
141+
// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]])
142+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
143+
// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
144+
// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
145+
// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16
146+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16
147+
// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1
148+
// LLVM: ret i64 {{%.*}}
149+
150+
float32_t test_vdups_lane_f32(float32x2_t src) {
151+
return vdups_lane_f32(src, 1);
152+
}
153+
154+
// CIR-LABEL: test_vdups_lane_f32
155+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
156+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
157+
158+
// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]])
159+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
160+
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
161+
// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
162+
// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8
163+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8
164+
// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1
165+
// LLVM: ret float {{%.*}}
166+
167+
float64_t test_vdupd_lane_f64(float64x1_t src) {
168+
return vdupd_lane_f64(src, 0);
169+
}
170+
171+
// CIR-LABEL: test_vdupd_lane_f64
172+
// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
173+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
174+
175+
// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]])
176+
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
177+
// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
178+
// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
179+
// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8
180+
// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8
181+
// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0
182+
// LLVM: ret double {{%.*}}
183+
184+
float32_t test_vdups_laneq_f32(float32x4_t src) {
185+
return vdups_laneq_f32(src, 3);
186+
}
187+
188+
// CIR-LABEL: test_vdups_laneq_f32
189+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
190+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
191+
192+
// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]])
193+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
194+
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
195+
// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
196+
// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16
197+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16
198+
// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3
199+
// LLVM: ret float {{%.*}}
200+
201+
float64_t test_vdupd_laneq_f64(float64x2_t src) {
202+
return vdupd_laneq_f64(src, 1);
203+
}
204+
205+
// CIR-LABEL: test_vdupd_laneq_f64
206+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
207+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
208+
209+
// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]])
210+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
211+
// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
212+
// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16
213+
// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16
214+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16
215+
// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1
216+
// LLVM: ret double {{%.*}}

0 commit comments

Comments
 (0)