[RISCV] Add lowering for @llvm.experimental.vector.compress #113291

wangpc-pp · 2024-10-22T09:59:23Z

This intrinsic was introduced by #92289 and currently we just expand
it for RISC-V.

This patch adds custom lowering for this intrinsic and simply maps
it to vcompress instruction.

This intrinsic was introduced by llvm#92289 and currently we just expand it for RISC-V. This patch adds custom lowering for this intrinsic and simply maps it to `vcompress` instruction. Fixes llvm#113242.

llvmbot · 2024-10-22T10:00:00Z

@llvm/pr-subscribers-backend-risc-v

Author: Pengcheng Wang (wangpc-pp)

Changes

This intrinsic was introduced by #92289 and currently we just expand
it for RISC-V.

This patch adds custom lowering for this intrinsic and simply maps
it to vcompress instruction.

Fixes #113242.

Patch is 67.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113291.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+40)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+1)
(added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll (+255)
(added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll (+339)
(added) llvm/test/CodeGen/RISCV/rvv/vector-compress.ll (+794)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3588ef46cadce1..eddb2fbbd709c2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -930,6 +930,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                              VT, Custom);
         }
       }
+
+      setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
     }
 
     for (MVT VT : VecTupleVTs) {
@@ -1051,6 +1053,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                           ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
                           ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
                          VT, Custom);
+
+      setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
     };
 
     // Sets common extload/truncstore actions on RVV floating-point vector
@@ -1306,6 +1310,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                 {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
                 Custom);
         }
+
+        setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
       }
 
       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1434,6 +1440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
              ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
              ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
             VT, Custom);
+
+        setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
       }
 
       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
@@ -7082,6 +7090,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::MSTORE:
   case ISD::VP_STORE:
     return lowerMaskedStore(Op, DAG);
+  case ISD::VECTOR_COMPRESS:
+    return lowerVectorCompress(Op, DAG);
   case ISD::SELECT_CC: {
     // This occurs because we custom legalize SETGT and SETUGT for setcc. That
     // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
@@ -11225,6 +11235,36 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
 }
 
+SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Val = Op.getOperand(0);
+  SDValue Mask = Op.getOperand(1);
+  SDValue Passthru = Op.getOperand(2);
+
+  MVT VT = Val.getSimpleValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+  MVT ContainerVT = VT;
+  if (VT.isFixedLengthVector()) {
+    ContainerVT = getContainerForFixedLengthVector(VT);
+    MVT MaskVT = getMaskTypeFor(ContainerVT);
+    Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
+    Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+    Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
+  }
+
+  SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+  SDValue Res =
+      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
+                  DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
+                  Passthru, Val, Mask, VL);
+
+  if (VT.isFixedLengthVector())
+    Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+
+  return Res;
+}
+
 SDValue
 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
                                                       SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index c3749447955330..9191d9a9469b60 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -955,6 +955,7 @@ class RISCVTargetLowering : public TargetLowering {
   SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
                                                SelectionDAG &DAG) const;
   SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
new file mode 100644
index 00000000000000..8f1ff7ed4a11e2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef)
+  ret <1 x half> %ret
+}
+
+define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru)
+  ret <1 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef)
+  ret <2 x half> %ret
+}
+
+define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru)
+  ret <2 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef)
+  ret <4 x half> %ret
+}
+
+define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru)
+  ret <4 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef)
+  ret <8 x half> %ret
+}
+
+define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru)
+  ret <8 x half> %ret
+}
+
+define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef)
+  ret <1 x float> %ret
+}
+
+define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f32_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru)
+  ret <1 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef)
+  ret <2 x float> %ret
+}
+
+define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f32_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru)
+  ret <2 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef)
+  ret <4 x float> %ret
+}
+
+define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f32_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru)
+  ret <4 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vcompress.vm v10, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef)
+  ret <8 x float> %ret
+}
+
+define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f32_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v10, v0
+; CHECK-NEXT:    ret
+  %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %ret
+}
+
+define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef)
+  ret <1 x double> %ret
+}
+
+define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1f64_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru)
+  ret <1 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef)
+  ret <2 x double> %ret
+}
+
+define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2f64_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru)
+  ret <2 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vcompress.vm v10, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef)
+  ret <4 x double> %ret
+}
+
+define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4f64_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v10, v0
+; CHECK-NEXT:    ret
+  %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru)
+  ret <4 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vcompress.vm v12, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef)
+  ret <8 x double> %ret
+}
+
+define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8f64_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v12, v0
+; CHECK-NEXT:    ret
+  %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru)
+  ret <8 x double> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
new file mode 100644
index 00000000000000..3952dc31838a2d
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <1 x i8> @vector_compress_v1i8(<1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> undef)
+  ret <1 x i8> %ret
+}
+
+define <1 x i8> @vector_compress_v1i8_passthru(<1 x i8> %passthru, <1 x i8> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i8_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> %passthru)
+  ret <1 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8(<2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> undef)
+  ret <2 x i8> %ret
+}
+
+define <2 x i8> @vector_compress_v2i8_passthru(<2 x i8> %passthru, <2 x i8> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i8_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> %passthru)
+  ret <2 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8(<4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> undef)
+  ret <4 x i8> %ret
+}
+
+define <4 x i8> @vector_compress_v4i8_passthru(<4 x i8> %passthru, <4 x i8> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4i8_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> %passthru)
+  ret <4 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8(<8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> undef)
+  ret <8 x i8> %ret
+}
+
+define <8 x i8> @vector_compress_v8i8_passthru(<8 x i8> %passthru, <8 x i8> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8i8_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> %passthru)
+  ret <8 x i8> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16(<1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> undef)
+  ret <1 x i16> %ret
+}
+
+define <1 x i16> @vector_compress_v1i16_passthru(<1 x i16> %passthru, <1 x i16> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1i16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> %passthru)
+  ret <1 x i16> %ret
+}
+
+define <2 x i16> @vector_compress_v2i16(<2 x i16> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2i16:
+; CHE...
[truncated]

preames

LGTM

This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.

…#113770) This is a follow up to #113291 and handles f16/bf16 with zvfhmin and zvfbmin.

…llvm#113770) This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.

[RISCV] Add lowering for @llvm.experimental.vector.compress

e9eeca4

This intrinsic was introduced by llvm#92289 and currently we just expand it for RISC-V. This patch adds custom lowering for this intrinsic and simply maps it to `vcompress` instruction. Fixes llvm#113242.

llvmbot added the backend:RISC-V label Oct 22, 2024

wangpc-pp requested review from preames, lukel97 and topperc October 22, 2024 10:00

preames approved these changes Oct 22, 2024

View reviewed changes

wangpc-pp merged commit b799cc3 into llvm:main Oct 23, 2024
10 checks passed

wangpc-pp deleted the main-riscv-vector-compress branch October 23, 2024 06:23

frobtech mentioned this pull request Oct 25, 2024

p/libc fexcept t #113664

Closed

lukel97 mentioned this pull request Oct 26, 2024

[RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin #113770

Merged

lukel97 added a commit to lukel97/llvm-project that referenced this pull request Oct 26, 2024

[RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin

2325030

This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.

lukel97 added a commit that referenced this pull request Oct 28, 2024

[RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin (…

96f5c68

…#113770) This is a follow up to #113291 and handles f16/bf16 with zvfhmin and zvfbmin.

NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024

[RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin (…

ec39571

…llvm#113770) This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Add lowering for @llvm.experimental.vector.compress #113291

[RISCV] Add lowering for @llvm.experimental.vector.compress #113291

wangpc-pp commented Oct 22, 2024

llvmbot commented Oct 22, 2024

preames left a comment

[RISCV] Add lowering for @llvm.experimental.vector.compress #113291

[RISCV] Add lowering for @llvm.experimental.vector.compress #113291

Conversation

wangpc-pp commented Oct 22, 2024

llvmbot commented Oct 22, 2024

preames left a comment

Choose a reason for hiding this comment