Skip to content

Commit f2760f6

Browse files
committed
Lower vld1_dup and vld1q_dup
1 parent 9975749 commit f2760f6

File tree

4 files changed

+248
-7
lines changed

4 files changed

+248
-7
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2821,13 +2821,16 @@ def VecSplatOp : CIR_Op<"vec.splat", [Pure,
28212821
let description = [{
28222822
The `cir.vec.splat` operation creates a vector value from a scalar value.
28232823
All elements of the vector have the same value, that of the given scalar.
2824+
If `poison` is specified, the start value of the result is poison values
2825+
instead of undef.
28242826
}];
28252827

2826-
let arguments = (ins CIR_AnyType:$value);
2828+
let arguments = (ins CIR_AnyType:$value, UnitAttr:$poison);
28272829
let results = (outs CIR_VectorType:$result);
28282830

28292831
let assemblyFormat = [{
2830-
$value `:` type($value) `,` qualified(type($result)) attr-dict
2832+
$value `:` type($value) `,` qualified(type($result))
2833+
(`poison` $poison^)? attr-dict
28312834
}];
28322835
let hasVerifier = 0;
28332836
}

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,7 +1949,9 @@ static mlir::cir::VectorType GetNeonType(CIRGenFunction *CGF,
19491949
CGF->getCIRGenModule().FloatTy,
19501950
V1Ty ? 1 : (2 << IsQuad));
19511951
case NeonTypeFlags::Float64:
1952-
llvm_unreachable("NYI");
1952+
return mlir::cir::VectorType::get(CGF->getBuilder().getContext(),
1953+
CGF->getCIRGenModule().DoubleTy,
1954+
V1Ty ? 1 : (1 << IsQuad));
19531955
}
19541956
llvm_unreachable("Unknown vector element type!");
19551957
}
@@ -3411,7 +3413,12 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
34113413
}
34123414
case NEON::BI__builtin_neon_vld1_dup_v:
34133415
case NEON::BI__builtin_neon_vld1q_dup_v: {
3414-
llvm_unreachable("NYI");
3416+
cir::Address ptrAddr = PtrOp0.withElementType(VTy.getEltType());
3417+
mlir::Value val = builder.createLoad(getLoc(E->getExprLoc()), ptrAddr);
3418+
mlir::cir::VecSplatOp vecSplat = builder.create<mlir::cir::VecSplatOp>(
3419+
getLoc(E->getExprLoc()), VTy, val);
3420+
vecSplat.setPoison(true);
3421+
return vecSplat;
34153422
}
34163423
case NEON::BI__builtin_neon_vst1_lane_v:
34173424
case NEON::BI__builtin_neon_vst1q_lane_v:

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,15 +1435,18 @@ class CIRVectorSplatLowering
14351435
assert(vecTy && "result type of cir.vec.splat op is not VectorType");
14361436
auto llvmTy = typeConverter->convertType(vecTy);
14371437
auto loc = op.getLoc();
1438-
mlir::Value undef = rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
1438+
mlir::Value startVal =
1439+
op.getPoison()
1440+
? rewriter.create<mlir::LLVM::PoisonOp>(loc, llvmTy).getRes()
1441+
: rewriter.create<mlir::LLVM::UndefOp>(loc, llvmTy);
14391442
mlir::Value indexValue =
14401443
rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), 0);
14411444
mlir::Value elementValue = adaptor.getValue();
14421445
mlir::Value oneElement = rewriter.create<mlir::LLVM::InsertElementOp>(
1443-
loc, undef, elementValue, indexValue);
1446+
loc, startVal, elementValue, indexValue);
14441447
SmallVector<int32_t> zeroValues(vecTy.getSize(), 0);
14451448
mlir::Value shuffled = rewriter.create<mlir::LLVM::ShuffleVectorOp>(
1446-
loc, oneElement, undef, zeroValues);
1449+
loc, oneElement, startVal, zeroValues);
14471450
rewriter.replaceOp(op, shuffled);
14481451
return mlir::success();
14491452
}

clang/test/CIR/CodeGen/neon-tmp.c

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2+
// RUN: -fclangir -disable-O0-optnone \
3+
// RUN: -flax-vector-conversions=none -emit-cir -o %t.cir %s
4+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
5+
6+
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
7+
// RUN: -fclangir -disable-O0-optnone \
8+
// RUN: -flax-vector-conversions=none -emit-llvm -o - %s \
9+
// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
10+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
11+
12+
// REQUIRES: aarch64-registered-target || arm-registered-target
13+
14+
// This test mimics clang/test/CodeGen/aarch64-neon-intrinsics.c, which eventually
15+
// CIR shall be able to support fully. Since this is going to take some time to converge,
16+
// the unsupported/NYI code is commented out, so that we can incrementally improve this.
17+
// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
18+
// correct result when implementing this into the CIR pipeline.
19+
20+
#include <arm_neon.h>
21+
22+
uint8x8_t test_vld1_dup_u8(uint8_t const * ptr) {
23+
return vld1_dup_u8(ptr);
24+
}
25+
26+
// CIR-LABEL: vld1_dup_u8
27+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
28+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 8> poison
29+
30+
// LLVM: {{.*}}test_vld1_dup_u8(ptr{{.*}}[[PTR:%.*]])
31+
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
32+
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
33+
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer
34+
35+
int8x8_t test_vld1_dup_s8(int8_t const * ptr) {
36+
return vld1_dup_s8(ptr);
37+
}
38+
39+
// CIR-LABEL: test_vld1_dup_s8
40+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
41+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 8> poison
42+
43+
// LLVM: {{.*}}test_vld1_dup_s8(ptr{{.*}}[[PTR:%.*]])
44+
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
45+
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
46+
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer
47+
48+
uint16x4_t test_vld1_dup_u16(uint16_t const * ptr) {
49+
return vld1_dup_u16(ptr);
50+
}
51+
52+
// CIR-LABEL: test_vld1_dup_u16
53+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
54+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 4> poison
55+
56+
// LLVM: {{.*}}test_vld1_dup_u16(ptr{{.*}}[[PTR:%.*]])
57+
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
58+
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
59+
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer
60+
61+
int16x4_t test_vld1_dup_s16(int16_t const * ptr) {
62+
return vld1_dup_s16(ptr);
63+
}
64+
65+
// CIR-LABEL: test_vld1_dup_s16
66+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
67+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 4> poison
68+
69+
// LLVM: {{.*}}test_vld1_dup_s16(ptr{{.*}}[[PTR:%.*]])
70+
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
71+
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
72+
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer
73+
74+
int32x2_t test_vld1_dup_s32(int32_t const * ptr) {
75+
return vld1_dup_s32(ptr);
76+
}
77+
78+
// CIR-LABEL: test_vld1_dup_s32
79+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
80+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 2> poison
81+
82+
// LLVM: {{.*}}test_vld1_dup_s32(ptr{{.*}}[[PTR:%.*]])
83+
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
84+
// LLVM: [[VEC:%.*]] = insertelement <2 x i32> poison, i32 [[VAL]], i64 0
85+
// LLVM: {{%.*}} = shufflevector <2 x i32> [[VEC]], <2 x i32> poison, <2 x i32> zeroinitializer
86+
87+
int64x1_t test_vld1_dup_s64(int64_t const * ptr) {
88+
return vld1_dup_s64(ptr);
89+
}
90+
91+
// CIR-LABEL: test_vld1_dup_s64
92+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
93+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 1> poison
94+
95+
// LLVM: {{.*}}test_vld1_dup_s64(ptr{{.*}}[[PTR:%.*]])
96+
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
97+
// LLVM: [[VEC:%.*]] = insertelement <1 x i64> poison, i64 [[VAL]], i64 0
98+
// LLVM: {{%.*}} = shufflevector <1 x i64> [[VEC]], <1 x i64> poison, <1 x i32> zeroinitializer
99+
100+
float32x2_t test_vld1_dup_f32(float32_t const * ptr) {
101+
return vld1_dup_f32(ptr);
102+
}
103+
104+
// CIR-LABEL: test_vld1_dup_f32
105+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
106+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 2> poison
107+
108+
// LLVM: {{.*}}test_vld1_dup_f32(ptr{{.*}}[[PTR:%.*]])
109+
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
110+
// LLVM: [[VEC:%.*]] = insertelement <2 x float> poison, float [[VAL]], i64 0
111+
// LLVM: {{%.*}} = shufflevector <2 x float> [[VEC]], <2 x float> poison, <2 x i32> zeroinitializer
112+
113+
float64x1_t test_vld1_dup_f64(float64_t const * ptr) {
114+
return vld1_dup_f64(ptr);
115+
}
116+
117+
// CIR-LABEL: test_vld1_dup_f64
118+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
119+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 1> poison
120+
121+
// LLVM: {{.*}}test_vld1_dup_f64(ptr{{.*}}[[PTR:%.*]])
122+
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
123+
// LLVM: [[VEC:%.*]] = insertelement <1 x double> poison, double [[VAL]], i64 0
124+
// LLVM: {{%.*}} = shufflevector <1 x double> [[VEC]], <1 x double> poison, <1 x i32> zeroinitializer
125+
126+
uint8x16_t test_vld1q_dup_u8(uint8_t const * ptr) {
127+
return vld1q_dup_u8(ptr);
128+
}
129+
130+
// CIR-LABEL: test_vld1q_dup_u8
131+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
132+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 16> poison
133+
134+
// LLVM: {{.*}}test_vld1q_dup_u8(ptr{{.*}}[[PTR:%.*]])
135+
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
136+
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
137+
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer
138+
139+
int8x16_t test_vld1q_dup_s8(int8_t const * ptr) {
140+
return vld1q_dup_s8(ptr);
141+
}
142+
143+
// CIR-LABEL: test_vld1q_dup_s8
144+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
145+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 16> poison
146+
147+
// LLVM: {{.*}}test_vld1q_dup_s8(ptr{{.*}}[[PTR:%.*]])
148+
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
149+
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
150+
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer
151+
152+
uint16x8_t test_vld1q_dup_u16(uint16_t const * ptr) {
153+
return vld1q_dup_u16(ptr);
154+
}
155+
156+
// CIR-LABEL: test_vld1q_dup_u16
157+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
158+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 8> poison
159+
160+
// LLVM: {{.*}}test_vld1q_dup_u16(ptr{{.*}}[[PTR:%.*]])
161+
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
162+
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
163+
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer
164+
165+
int16x8_t test_vld1q_dup_s16(int16_t const * ptr) {
166+
return vld1q_dup_s16(ptr);
167+
}
168+
169+
// CIR-LABEL: test_vld1q_dup_s16
170+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
171+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 8> poison
172+
173+
// LLVM: {{.*}}test_vld1q_dup_s16(ptr{{.*}}[[PTR:%.*]])
174+
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
175+
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
176+
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer
177+
178+
int32x4_t test_vld1q_dup_s32(int32_t const * ptr) {
179+
return vld1q_dup_s32(ptr);
180+
}
181+
182+
// CIR-LABEL: test_vld1q_dup_s32
183+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
184+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 4> poison
185+
186+
// LLVM: {{.*}}test_vld1q_dup_s32(ptr{{.*}}[[PTR:%.*]])
187+
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
188+
// LLVM: [[VEC:%.*]] = insertelement <4 x i32> poison, i32 [[VAL]], i64 0
189+
// LLVM: {{%.*}} = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <4 x i32> zeroinitializer
190+
191+
int64x2_t test_vld1q_dup_s64(int64_t const * ptr) {
192+
return vld1q_dup_s64(ptr);
193+
}
194+
195+
// CIR-LABEL: test_vld1q_dup_s64
196+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
197+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 2> poison
198+
199+
// LLVM: {{.*}}test_vld1q_dup_s64(ptr{{.*}}[[PTR:%.*]])
200+
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
201+
// LLVM: [[VEC:%.*]] = insertelement <2 x i64> poison, i64 [[VAL]], i64 0
202+
// LLVM: {{%.*}} = shufflevector <2 x i64> [[VEC]], <2 x i64> poison, <2 x i32> zeroinitializer
203+
204+
float32x4_t test_vld1q_dup_f32(float32_t const * ptr) {
205+
return vld1q_dup_f32(ptr);
206+
}
207+
208+
// CIR-LABEL: test_vld1q_dup_f32
209+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
210+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 4> poison
211+
212+
// LLVM: {{.*}}test_vld1q_dup_f32(ptr{{.*}}[[PTR:%.*]])
213+
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
214+
// LLVM: [[VEC:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
215+
// LLVM: {{%.*}} = shufflevector <4 x float> [[VEC]], <4 x float> poison, <4 x i32> zeroinitializer
216+
217+
float64x2_t test_vld1q_dup_f64(float64_t const * ptr) {
218+
return vld1q_dup_f64(ptr);
219+
}
220+
221+
// CIR-LABEL: test_vld1q_dup_f64
222+
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
223+
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 2> poison
224+
225+
// LLVM: {{.*}}test_vld1q_dup_f64(ptr{{.*}}[[PTR:%.*]])
226+
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
227+
// LLVM: [[VEC:%.*]] = insertelement <2 x double> poison, double [[VAL]], i64 0
228+
// LLVM: {{%.*}} = shufflevector <2 x double> [[VEC]], <2 x double> poison, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)