Skip to content

Commit 486649e

Browse files
ghehglanza
authored andcommitted
[CIR][CIRGen][Builtin][Neon] Lower neon_vtst_v and neon_vtstq_v (#1013)
In addition, this PR enables ZeroAttr of vector type so that CIR can generate a vector initialized with all zero values.
1 parent 4dd6028 commit 486649e

File tree

4 files changed

+310
-2
lines changed

4 files changed

+310
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuilder.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,8 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
603603
mlir::cir::ConstantOp getZero(mlir::Location loc, mlir::Type ty) {
604604
// TODO: dispatch creation for primitive types.
605605
assert((mlir::isa<mlir::cir::StructType>(ty) ||
606-
mlir::isa<mlir::cir::ArrayType>(ty)) &&
606+
mlir::isa<mlir::cir::ArrayType>(ty) ||
607+
mlir::isa<mlir::cir::VectorType>(ty)) &&
607608
"NYI for other types");
608609
return create<mlir::cir::ConstantOp>(loc, ty, getZeroAttr(ty));
609610
}

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -2377,6 +2377,17 @@ mlir::Value CIRGenFunction::buildCommonNeonBuiltinExpr(
23772377
ops[0] = builder.createIntCast(ops[0], vTy);
23782378
return buildCommonNeonShift(builder, loc, vTy, ops[0], ops[1], true);
23792379
}
2380+
case NEON::BI__builtin_neon_vtst_v:
2381+
case NEON::BI__builtin_neon_vtstq_v: {
2382+
mlir::Location loc = getLoc(e->getExprLoc());
2383+
ops[0] = builder.createBitcast(ops[0], ty);
2384+
ops[1] = builder.createBitcast(ops[1], ty);
2385+
ops[0] = builder.createAnd(ops[0], ops[1]);
2386+
// Note that during LLVM Lowering, result of `VecCmpOp` is sign extended,
2387+
// matching traditional codegen behavior.
2388+
return builder.create<mlir::cir::VecCmpOp>(
2389+
loc, ty, mlir::cir::CmpOpKind::ne, ops[0], builder.getZero(loc, ty));
2390+
}
23802391
}
23812392

23822393
// This second switch is for the intrinsics that might have a more generic

clang/lib/CIR/Dialect/IR/CIRDialect.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
388388

389389
if (isa<mlir::cir::ZeroAttr>(attrType)) {
390390
if (::mlir::isa<::mlir::cir::StructType, ::mlir::cir::ArrayType,
391-
::mlir::cir::ComplexType>(opType))
391+
::mlir::cir::ComplexType, ::mlir::cir::VectorType>(opType))
392392
return success();
393393
return op->emitOpError("zero expects struct or array type");
394394
}

clang/test/CIR/CodeGen/AArch64/neon-misc.c

+296
Original file line numberDiff line numberDiff line change
@@ -492,3 +492,299 @@ uint32x2_t test_vqmovun_s64(int64x2_t a) {
492492
// LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
493493
// LLVM: ret <2 x i32> [[VQMOVUN_V1_I]]
494494
}
495+
496+
uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
497+
return vtst_s8(v1, v2);
498+
499+
// CIR-LABEL: vtst_s8
500+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
501+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
502+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 8>
503+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 8>
504+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>
505+
506+
// LLVM: {{.*}}test_vtst_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
507+
// LLVM: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
508+
// LLVM: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
509+
// LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
510+
// LLVM: ret <8 x i8> [[VTST_I]]
511+
}
512+
513+
uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
514+
return vtst_u8(v1, v2);
515+
516+
// CIR-LABEL: vtst_u8
517+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
518+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u8i x 8>
519+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 8>
520+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 8>
521+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>
522+
523+
// LLVM: {{.*}}test_vtst_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
524+
// LLVM: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
525+
// LLVM: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
526+
// LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
527+
// LLVM: ret <8 x i8> [[VTST_I]]
528+
}
529+
530+
uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
531+
return vtst_s16(v1, v2);
532+
533+
// CIR-LABEL: vtst_s16
534+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
535+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
536+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u16i x 4>
537+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 4>
538+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>
539+
540+
// LLVM: {{.*}}test_vtst_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
541+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
542+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
543+
// LLVM: [[TMP2:%.*]] = and <4 x i16> [[V1]], [[V2]]
544+
// LLVM: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
545+
// LLVM: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
546+
// LLVM: ret <4 x i16> [[VTST_I]]
547+
}
548+
549+
uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
550+
return vtst_u16(v1, v2);
551+
552+
// CIR-LABEL: vtst_u16
553+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
554+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4>
555+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u16i x 4>
556+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 4>
557+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>
558+
559+
// LLVM: {{.*}}test_vtst_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
560+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
561+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
562+
// LLVM: [[TMP2:%.*]] = and <4 x i16> [[V1]], [[V2]]
563+
// LLVM: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
564+
// LLVM: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
565+
// LLVM: ret <4 x i16> [[VTST_I]]
566+
}
567+
568+
uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
569+
return vtst_s32(v1, v2);
570+
571+
// CIR-LABEL: vtst_s32
572+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
573+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
574+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u32i x 2>
575+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 2>
576+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>
577+
578+
// LLVM: {{.*}}test_vtst_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
579+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
580+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
581+
// LLVM: [[TMP2:%.*]] = and <2 x i32> [[V1]], [[V2]]
582+
// LLVM: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
583+
// LLVM: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
584+
// LLVM: ret <2 x i32> [[VTST_I]]
585+
}
586+
587+
uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
588+
return vtst_u32(v1, v2);
589+
590+
// CIR-LABEL: vtst_u32
591+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
592+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2>
593+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u32i x 2>
594+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 2>
595+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>
596+
597+
// LLVM: {{.*}}test_vtst_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
598+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
599+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
600+
// LLVM: [[TMP2:%.*]] = and <2 x i32> [[V1]], [[V2]]
601+
// LLVM: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
602+
// LLVM: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
603+
// LLVM: ret <2 x i32> [[VTST_I]]
604+
}
605+
606+
uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
607+
return vtst_s64(a, b);
608+
609+
// CIR-LABEL: vtst_s64
610+
// CIR: [[A:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
611+
// CIR: [[B:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
612+
// CIR: [[AND:%.*]] = cir.binop(and, [[A]], [[B]]) : !cir.vector<!u64i x 1>
613+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 1>
614+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>
615+
616+
// LLVM: {{.*}}test_vtst_s64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
617+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
618+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
619+
// LLVM: [[TMP2:%.*]] = and <1 x i64> [[A]], [[B]]
620+
// LLVM: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
621+
// LLVM: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
622+
// LLVM: ret <1 x i64> [[VTST_I]]
623+
}
624+
625+
uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
626+
return vtst_u64(a, b);
627+
628+
// CIR-LABEL: vtst_u64
629+
// CIR: [[A:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
630+
// CIR: [[B:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1>
631+
// CIR: [[AND:%.*]] = cir.binop(and, [[A]], [[B]]) : !cir.vector<!u64i x 1>
632+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 1>
633+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>
634+
635+
// LLVM: {{.*}}test_vtst_u64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
636+
// LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
637+
// LLVM: [[TMP1:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
638+
// LLVM: [[TMP2:%.*]] = and <1 x i64> [[A]], [[B]]
639+
// LLVM: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
640+
// LLVM: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
641+
// LLVM: ret <1 x i64> [[VTST_I]]
642+
}
643+
644+
uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
645+
return vtstq_s8(v1, v2);
646+
647+
// CIR-LABEL: vtstq_s8
648+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
649+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
650+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 16>
651+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 16>
652+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>
653+
654+
// LLVM: {{.*}}test_vtstq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
655+
// LLVM: [[TMP0:%.*]] = and <16 x i8> [[V1]], [[V2]]
656+
// LLVM: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
657+
// LLVM: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
658+
// LLVM: ret <16 x i8> [[VTST_I]]
659+
}
660+
661+
uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
662+
return vtstq_u8(v1, v2);
663+
664+
// CIR-LABEL: vtstq_u8
665+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
666+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u8i x 16>
667+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 16>
668+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 16>
669+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>
670+
671+
// LLVM: {{.*}}test_vtstq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
672+
// LLVM: [[TMP0:%.*]] = and <16 x i8> [[V1]], [[V2]]
673+
// LLVM: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
674+
// LLVM: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
675+
// LLVM: ret <16 x i8> [[VTST_I]]
676+
}
677+
678+
uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
679+
return vtstq_s16(v1, v2);
680+
681+
// CIR-LABEL: vtstq_s16
682+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
683+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
684+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
685+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 8>
686+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>
687+
688+
// LLVM: {{.*}}test_vtstq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
689+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
690+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
691+
// LLVM: [[TMP2:%.*]] = and <8 x i16> [[V1]], [[V2]]
692+
// LLVM: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
693+
// LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
694+
// LLVM: ret <8 x i16> [[VTST_I]]
695+
}
696+
697+
uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
698+
return vtstq_u16(v1, v2);
699+
700+
// CIR-LABEL: vtstq_u16
701+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
702+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8>
703+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
704+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 8>
705+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>
706+
707+
// LLVM: {{.*}}test_vtstq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
708+
// LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
709+
// LLVM: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
710+
// LLVM: [[TMP2:%.*]] = and <8 x i16> [[V1]], [[V2]]
711+
// LLVM: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
712+
// LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
713+
// LLVM: ret <8 x i16> [[VTST_I]]
714+
}
715+
716+
uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
717+
return vtstq_s32(v1, v2);
718+
719+
// CIR-LABEL: vtstq_s32
720+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
721+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
722+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
723+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 4>
724+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>
725+
726+
// LLVM: {{.*}}test_vtstq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
727+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
728+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
729+
// LLVM: [[TMP2:%.*]] = and <4 x i32> [[V1]], [[V2]]
730+
// LLVM: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
731+
// LLVM: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
732+
// LLVM: ret <4 x i32> [[VTST_I]]
733+
}
734+
735+
uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
736+
return vtstq_u32(v1, v2);
737+
738+
// CIR-LABEL: vtstq_u32
739+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
740+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4>
741+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
742+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 4>
743+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>
744+
745+
// LLVM: {{.*}}test_vtstq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
746+
// LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
747+
// LLVM: [[TMP1:%.*]] = bitcast <4 x i32> [[V2]] to <16 x i8>
748+
// LLVM: [[TMP2:%.*]] = and <4 x i32> [[V1]], [[V2]]
749+
// LLVM: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
750+
// LLVM: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
751+
// LLVM: ret <4 x i32> [[VTST_I]]
752+
}
753+
754+
uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
755+
return vtstq_s64(v1, v2);
756+
757+
// CIR-LABEL: vtstq_s64
758+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
759+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
760+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
761+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 2>
762+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>
763+
764+
// LLVM: {{.*}}test_vtstq_s64(<2 x i64>{{.*}}[[V1:%.*]], <2 x i64>{{.*}}[[V2:%.*]])
765+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
766+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
767+
// LLVM: [[TMP2:%.*]] = and <2 x i64> [[V1]], [[V2]]
768+
// LLVM: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
769+
// LLVM: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
770+
// LLVM: ret <2 x i64> [[VTST_I]]
771+
}
772+
773+
uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
774+
return vtstq_u64(v1, v2);
775+
776+
// CIR-LABEL: vtstq_u64
777+
// CIR: [[V1:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
778+
// CIR: [[V2:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2>
779+
// CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
780+
// CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 2>
781+
// CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>
782+
783+
// LLVM: {{.*}}test_vtstq_u64(<2 x i64>{{.*}}[[V1:%.*]], <2 x i64>{{.*}}[[V2:%.*]])
784+
// LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V1]] to <16 x i8>
785+
// LLVM: [[TMP1:%.*]] = bitcast <2 x i64> [[V2]] to <16 x i8>
786+
// LLVM: [[TMP2:%.*]] = and <2 x i64> [[V1]], [[V2]]
787+
// LLVM: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
788+
// LLVM: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
789+
// LLVM: ret <2 x i64> [[VTST_I]]
790+
}

0 commit comments

Comments
 (0)