-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[RISCV] Add lowering for @llvm.experimental.vector.compress #113291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This intrinsic was introduced by llvm#92289 and currently we just expand it for RISC-V. This patch adds custom lowering for this intrinsic and simply maps it to `vcompress` instruction. Fixes llvm#113242.
@llvm/pr-subscribers-backend-risc-v Author: Pengcheng Wang (wangpc-pp) ChangesThis intrinsic was introduced by #92289 and currently we just expand This patch adds custom lowering for this intrinsic and simply maps Fixes #113242. Patch is 67.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113291.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3588ef46cadce1..eddb2fbbd709c2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -930,6 +930,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Custom);
}
}
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
for (MVT VT : VecTupleVTs) {
@@ -1051,6 +1053,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -1306,6 +1310,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
Custom);
}
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1434,6 +1440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
VT, Custom);
+
+ setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
@@ -7082,6 +7090,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
+ case ISD::VECTOR_COMPRESS:
+ return lowerVectorCompress(Op, DAG);
case ISD::SELECT_CC: {
// This occurs because we custom legalize SETGT and SETUGT for setcc. That
// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
@@ -11225,6 +11235,36 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
+SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+
+ MVT VT = Val.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
+ }
+
+ SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ SDValue Res =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+ DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+ Passthru, Val, Mask, VL);
+
+ if (VT.isFixedLengthVector())
+ Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+
+ return Res;
+}
+
SDValue
RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index c3749447955330..9191d9a9469b60 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -955,6 +955,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
new file mode 100644
index 00000000000000..8f1ff7ed4a11e2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef)
+ ret <1 x half> %ret
+}
+
+define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru)
+ ret <1 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef)
+ ret <2 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru)
+ ret <2 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef)
+ ret <4 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru)
+ ret <4 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef)
+ ret <8 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru)
+ ret <8 x half> %ret
+}
+
+define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef)
+ ret <1 x float> %ret
+}
+
+define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru)
+ ret <1 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef)
+ ret <2 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru)
+ ret <2 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef)
+ ret <4 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru)
+ ret <4 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef)
+ ret <8 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v10, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru)
+ ret <8 x float> %ret
+}
+
+define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef)
+ ret <1 x double> %ret
+}
+
+define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru)
+ ret <1 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef)
+ ret <2 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru)
+ ret <2 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vcompress.vm v10, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef)
+ ret <4 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v10, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru)
+ ret <4 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vcompress.vm v12, v8, v0
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef)
+ ret <8 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v12, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru)
+ ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
new file mode 100644
index 00000000000000..3952dc31838a2d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x i8> @vector_compress_v1i8(<1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> undef)
+ ret <1 x i8> %ret
+}
+
+define <1 x i8> @vector_compress_v1i8_passthru(<1 x i8> %passthru, <1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, mf8, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> %passthru)
+ ret <1 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8(<2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> undef)
+ ret <2 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8_passthru(<2 x i8> %passthru, <2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> %passthru)
+ ret <2 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8(<4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> undef)
+ ret <4 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8_passthru(<4 x i8> %passthru, <4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> %passthru)
+ ret <4 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8(<8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> undef)
+ ret <8 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8_passthru(<8 x i8> %passthru, <8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> %passthru)
+ ret <8 x i8> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16(<1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vcompress.vm v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> undef)
+ ret <1 x i16> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16_passthru(<1 x i16> %passthru, <1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+ %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> %passthru)
+ ret <1 x i16> %ret
+}
+
+define <2 x i16> @vector_compress_v2i16(<2 x i16> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i16:
+; CHE...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.
…llvm#113770) This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.
This intrinsic was introduced by #92289 and currently we just expand
it for RISC-V.
This patch adds custom lowering for this intrinsic and simply maps
it to
vcompress
instruction.Fixes #113242.