Skip to content

[CIR][CIRGen][Builtin][Neon] Lower vld1_dup and vld1q_dup #936

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3451,7 +3451,11 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
}
case NEON::BI__builtin_neon_vld1_dup_v:
case NEON::BI__builtin_neon_vld1q_dup_v: {
llvm_unreachable("NYI");
cir::Address ptrAddr = PtrOp0.withElementType(vTy.getEltType());
mlir::Value val = builder.createLoad(getLoc(E->getExprLoc()), ptrAddr);
mlir::cir::VecSplatOp vecSplat = builder.create<mlir::cir::VecSplatOp>(
getLoc(E->getExprLoc()), vTy, val);
return vecSplat;
}
case NEON::BI__builtin_neon_vst1_lane_v:
case NEON::BI__builtin_neon_vst1q_lane_v: {
Expand Down
208 changes: 208 additions & 0 deletions clang/test/CIR/CodeGen/AArch64/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -17577,3 +17577,211 @@ int32x2_t test_vmovn_s64(int64x2_t a) {
// LLVM: [[VMOVN_I:%.*]] = trunc <2 x i64> [[A]] to <2 x i32>
// LLVM: ret <2 x i32> [[VMOVN_I]]
}

uint8x8_t test_vld1_dup_u8(uint8_t const * ptr) {
return vld1_dup_u8(ptr);
}

// CIR-LABEL: vld1_dup_u8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 8>

// LLVM: {{.*}}test_vld1_dup_u8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer

int8x8_t test_vld1_dup_s8(int8_t const * ptr) {
return vld1_dup_s8(ptr);
}

// CIR-LABEL: test_vld1_dup_s8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 8>

// LLVM: {{.*}}test_vld1_dup_s8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer

uint16x4_t test_vld1_dup_u16(uint16_t const * ptr) {
return vld1_dup_u16(ptr);
}

// CIR-LABEL: test_vld1_dup_u16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 4>

// LLVM: {{.*}}test_vld1_dup_u16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer

int16x4_t test_vld1_dup_s16(int16_t const * ptr) {
return vld1_dup_s16(ptr);
}

// CIR-LABEL: test_vld1_dup_s16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 4>

// LLVM: {{.*}}test_vld1_dup_s16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer

int32x2_t test_vld1_dup_s32(int32_t const * ptr) {
return vld1_dup_s32(ptr);
}

// CIR-LABEL: test_vld1_dup_s32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 2>

// LLVM: {{.*}}test_vld1_dup_s32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <2 x i32> poison, i32 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x i32> [[VEC]], <2 x i32> poison, <2 x i32> zeroinitializer

int64x1_t test_vld1_dup_s64(int64_t const * ptr) {
return vld1_dup_s64(ptr);
}

// CIR-LABEL: test_vld1_dup_s64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 1>

// LLVM: {{.*}}test_vld1_dup_s64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <1 x i64> poison, i64 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <1 x i64> [[VEC]], <1 x i64> poison, <1 x i32> zeroinitializer

float32x2_t test_vld1_dup_f32(float32_t const * ptr) {
return vld1_dup_f32(ptr);
}

// CIR-LABEL: test_vld1_dup_f32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 2>

// LLVM: {{.*}}test_vld1_dup_f32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <2 x float> poison, float [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x float> [[VEC]], <2 x float> poison, <2 x i32> zeroinitializer

float64x1_t test_vld1_dup_f64(float64_t const * ptr) {
return vld1_dup_f64(ptr);
}

// CIR-LABEL: test_vld1_dup_f64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 1>

// LLVM: {{.*}}test_vld1_dup_f64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <1 x double> poison, double [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <1 x double> [[VEC]], <1 x double> poison, <1 x i32> zeroinitializer

uint8x16_t test_vld1q_dup_u8(uint8_t const * ptr) {
return vld1q_dup_u8(ptr);
}

// CIR-LABEL: test_vld1q_dup_u8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u8i>, !u8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 16>

// LLVM: {{.*}}test_vld1q_dup_u8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer

int8x16_t test_vld1q_dup_s8(int8_t const * ptr) {
return vld1q_dup_s8(ptr);
}

// CIR-LABEL: test_vld1q_dup_s8
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s8i>, !s8i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 16>

// LLVM: {{.*}}test_vld1q_dup_s8(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer

uint16x8_t test_vld1q_dup_u16(uint16_t const * ptr) {
return vld1q_dup_u16(ptr);
}

// CIR-LABEL: test_vld1q_dup_u16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!u16i>, !u16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 8>

// LLVM: {{.*}}test_vld1q_dup_u16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer

int16x8_t test_vld1q_dup_s16(int16_t const * ptr) {
return vld1q_dup_s16(ptr);
}

// CIR-LABEL: test_vld1q_dup_s16
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s16i>, !s16i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 8>

// LLVM: {{.*}}test_vld1q_dup_s16(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer

int32x4_t test_vld1q_dup_s32(int32_t const * ptr) {
return vld1q_dup_s32(ptr);
}

// CIR-LABEL: test_vld1q_dup_s32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s32i>, !s32i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 4>

// LLVM: {{.*}}test_vld1q_dup_s32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <4 x i32> poison, i32 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <4 x i32> zeroinitializer

int64x2_t test_vld1q_dup_s64(int64_t const * ptr) {
return vld1q_dup_s64(ptr);
}

// CIR-LABEL: test_vld1q_dup_s64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!s64i>, !s64i
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 2>

// LLVM: {{.*}}test_vld1q_dup_s64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <2 x i64> poison, i64 [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x i64> [[VEC]], <2 x i64> poison, <2 x i32> zeroinitializer

float32x4_t test_vld1q_dup_f32(float32_t const * ptr) {
return vld1q_dup_f32(ptr);
}

// CIR-LABEL: test_vld1q_dup_f32
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.float>, !cir.float
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 4>

// LLVM: {{.*}}test_vld1q_dup_f32(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
// LLVM: [[VEC:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <4 x float> [[VEC]], <4 x float> poison, <4 x i32> zeroinitializer

float64x2_t test_vld1q_dup_f64(float64_t const * ptr) {
return vld1q_dup_f64(ptr);
}

// CIR-LABEL: test_vld1q_dup_f64
// CIR: [[VAL:%.*]] = cir.load {{%.*}} : !cir.ptr<!cir.double>, !cir.double
// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 2>

// LLVM: {{.*}}test_vld1q_dup_f64(ptr{{.*}}[[PTR:%.*]])
// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
// LLVM: [[VEC:%.*]] = insertelement <2 x double> poison, double [[VAL]], i64 0
// LLVM: {{%.*}} = shufflevector <2 x double> [[VEC]], <2 x double> poison, <2 x i32> zeroinitializer