Skip to content

Commit ec39571

Browse files
lukel97NoumanAmir657
authored andcommitted
[RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin (llvm#113770)
This is a follow up to llvm#113291 and handles f16/bf16 with zvfhmin and zvfbmin.
1 parent adf04a2 commit ec39571

File tree

3 files changed

+224
-8
lines changed

3 files changed

+224
-8
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10811081
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
10821082
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
10831083
ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
1084-
ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE},
1084+
ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
1085+
ISD::VECTOR_COMPRESS},
10851086
VT, Custom);
10861087
MVT EltVT = VT.getVectorElementType();
10871088
if (isTypeLegal(EltVT))
@@ -1333,7 +1334,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13331334
setOperationAction(ISD::UNDEF, VT, Custom);
13341335

13351336
setOperationAction({ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE,
1336-
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
1337+
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
1338+
ISD::VECTOR_COMPRESS},
13371339
VT, Custom);
13381340

13391341
// FIXME: mload, mstore, mgather, mscatter, vp_load/store,
@@ -1440,8 +1442,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14401442
ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
14411443
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
14421444
VT, Custom);
1443-
1444-
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
14451445
}
14461446

14471447
// Custom-legalize bitcasts from fixed-length vectors to scalar types.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll

+88-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,92 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
3+
; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
4+
; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
6+
7+
define <1 x bfloat> @vector_compress_v1bf16(<1 x bfloat> %v, <1 x i1> %mask) {
8+
; CHECK-LABEL: vector_compress_v1bf16:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
11+
; CHECK-NEXT: vcompress.vm v9, v8, v0
12+
; CHECK-NEXT: vmv1r.v v8, v9
13+
; CHECK-NEXT: ret
14+
%ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> undef)
15+
ret <1 x bfloat> %ret
16+
}
17+
18+
define <1 x bfloat> @vector_compress_v1bf16_passthru(<1 x bfloat> %passthru, <1 x bfloat> %v, <1 x i1> %mask) {
19+
; CHECK-LABEL: vector_compress_v1bf16_passthru:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
22+
; CHECK-NEXT: vcompress.vm v8, v9, v0
23+
; CHECK-NEXT: ret
24+
%ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> %passthru)
25+
ret <1 x bfloat> %ret
26+
}
27+
28+
define <2 x bfloat> @vector_compress_v2bf16(<2 x bfloat> %v, <2 x i1> %mask) {
29+
; CHECK-LABEL: vector_compress_v2bf16:
30+
; CHECK: # %bb.0:
31+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
32+
; CHECK-NEXT: vcompress.vm v9, v8, v0
33+
; CHECK-NEXT: vmv1r.v v8, v9
34+
; CHECK-NEXT: ret
35+
%ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> undef)
36+
ret <2 x bfloat> %ret
37+
}
38+
39+
define <2 x bfloat> @vector_compress_v2bf16_passthru(<2 x bfloat> %passthru, <2 x bfloat> %v, <2 x i1> %mask) {
40+
; CHECK-LABEL: vector_compress_v2bf16_passthru:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
43+
; CHECK-NEXT: vcompress.vm v8, v9, v0
44+
; CHECK-NEXT: ret
45+
%ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> %passthru)
46+
ret <2 x bfloat> %ret
47+
}
48+
49+
define <4 x bfloat> @vector_compress_v4bf16(<4 x bfloat> %v, <4 x i1> %mask) {
50+
; CHECK-LABEL: vector_compress_v4bf16:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
53+
; CHECK-NEXT: vcompress.vm v9, v8, v0
54+
; CHECK-NEXT: vmv1r.v v8, v9
55+
; CHECK-NEXT: ret
56+
%ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> undef)
57+
ret <4 x bfloat> %ret
58+
}
59+
60+
define <4 x bfloat> @vector_compress_v4bf16_passthru(<4 x bfloat> %passthru, <4 x bfloat> %v, <4 x i1> %mask) {
61+
; CHECK-LABEL: vector_compress_v4bf16_passthru:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
64+
; CHECK-NEXT: vcompress.vm v8, v9, v0
65+
; CHECK-NEXT: ret
66+
%ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> %passthru)
67+
ret <4 x bfloat> %ret
68+
}
69+
70+
define <8 x bfloat> @vector_compress_v8bf16(<8 x bfloat> %v, <8 x i1> %mask) {
71+
; CHECK-LABEL: vector_compress_v8bf16:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
74+
; CHECK-NEXT: vcompress.vm v9, v8, v0
75+
; CHECK-NEXT: vmv.v.v v8, v9
76+
; CHECK-NEXT: ret
77+
%ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> undef)
78+
ret <8 x bfloat> %ret
79+
}
80+
81+
define <8 x bfloat> @vector_compress_v8bf16_passthru(<8 x bfloat> %passthru, <8 x bfloat> %v, <8 x i1> %mask) {
82+
; CHECK-LABEL: vector_compress_v8bf16_passthru:
83+
; CHECK: # %bb.0:
84+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
85+
; CHECK-NEXT: vcompress.vm v8, v9, v0
86+
; CHECK-NEXT: ret
87+
%ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> %passthru)
88+
ret <8 x bfloat> %ret
89+
}
490

591
define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
692
; CHECK-LABEL: vector_compress_v1f16:

llvm/test/CodeGen/RISCV/rvv/vector-compress.ll

+132-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s
3-
; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s
2+
; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
3+
; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
4+
; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
46

57
; Vector compress for i8 type
68

@@ -472,6 +474,134 @@ define <vscale x 8 x i64> @vector_compress_nxv8i64_passthru(<vscale x 8 x i64> %
472474
ret <vscale x 8 x i64> %ret
473475
}
474476

477+
; Vector compress for bf16 type
478+
479+
define <vscale x 1 x bfloat> @vector_compress_nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask) {
480+
; CHECK-LABEL: vector_compress_nxv1bf16:
481+
; CHECK: # %bb.0:
482+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
483+
; CHECK-NEXT: vcompress.vm v9, v8, v0
484+
; CHECK-NEXT: vmv1r.v v8, v9
485+
; CHECK-NEXT: ret
486+
%ret = call <vscale x 1 x bfloat> @llvm.experimental.vector.compress.nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask, <vscale x 1 x bfloat> undef)
487+
ret <vscale x 1 x bfloat> %ret
488+
}
489+
490+
define <vscale x 1 x bfloat> @vector_compress_nxv1bf16_passthru(<vscale x 1 x bfloat> %passthru, <vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask) {
491+
; CHECK-LABEL: vector_compress_nxv1bf16_passthru:
492+
; CHECK: # %bb.0:
493+
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, ma
494+
; CHECK-NEXT: vcompress.vm v8, v9, v0
495+
; CHECK-NEXT: ret
496+
%ret = call <vscale x 1 x bfloat> @llvm.experimental.vector.compress.nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask, <vscale x 1 x bfloat> %passthru)
497+
ret <vscale x 1 x bfloat> %ret
498+
}
499+
500+
define <vscale x 2 x bfloat> @vector_compress_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask) {
501+
; CHECK-LABEL: vector_compress_nxv2bf16:
502+
; CHECK: # %bb.0:
503+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
504+
; CHECK-NEXT: vcompress.vm v9, v8, v0
505+
; CHECK-NEXT: vmv1r.v v8, v9
506+
; CHECK-NEXT: ret
507+
%ret = call <vscale x 2 x bfloat> @llvm.experimental.vector.compress.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
508+
ret <vscale x 2 x bfloat> %ret
509+
}
510+
511+
define <vscale x 2 x bfloat> @vector_compress_nxv2bf16_passthru(<vscale x 2 x bfloat> %passthru, <vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask) {
512+
; CHECK-LABEL: vector_compress_nxv2bf16_passthru:
513+
; CHECK: # %bb.0:
514+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, ma
515+
; CHECK-NEXT: vcompress.vm v8, v9, v0
516+
; CHECK-NEXT: ret
517+
%ret = call <vscale x 2 x bfloat> @llvm.experimental.vector.compress.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> %passthru)
518+
ret <vscale x 2 x bfloat> %ret
519+
}
520+
521+
define <vscale x 4 x bfloat> @vector_compress_nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask) {
522+
; CHECK-LABEL: vector_compress_nxv4bf16:
523+
; CHECK: # %bb.0:
524+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
525+
; CHECK-NEXT: vcompress.vm v9, v8, v0
526+
; CHECK-NEXT: vmv.v.v v8, v9
527+
; CHECK-NEXT: ret
528+
%ret = call <vscale x 4 x bfloat> @llvm.experimental.vector.compress.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask, <vscale x 4 x bfloat> undef)
529+
ret <vscale x 4 x bfloat> %ret
530+
}
531+
532+
define <vscale x 4 x bfloat> @vector_compress_nxv4bf16_passthru(<vscale x 4 x bfloat> %passthru, <vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask) {
533+
; CHECK-LABEL: vector_compress_nxv4bf16_passthru:
534+
; CHECK: # %bb.0:
535+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
536+
; CHECK-NEXT: vcompress.vm v8, v9, v0
537+
; CHECK-NEXT: ret
538+
%ret = call <vscale x 4 x bfloat> @llvm.experimental.vector.compress.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask, <vscale x 4 x bfloat> %passthru)
539+
ret <vscale x 4 x bfloat> %ret
540+
}
541+
542+
define <vscale x 8 x bfloat> @vector_compress_nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask) {
543+
; CHECK-LABEL: vector_compress_nxv8bf16:
544+
; CHECK: # %bb.0:
545+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
546+
; CHECK-NEXT: vcompress.vm v10, v8, v0
547+
; CHECK-NEXT: vmv.v.v v8, v10
548+
; CHECK-NEXT: ret
549+
%ret = call <vscale x 8 x bfloat> @llvm.experimental.vector.compress.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> undef)
550+
ret <vscale x 8 x bfloat> %ret
551+
}
552+
553+
define <vscale x 8 x bfloat> @vector_compress_nxv8bf16_passthru(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask) {
554+
; CHECK-LABEL: vector_compress_nxv8bf16_passthru:
555+
; CHECK: # %bb.0:
556+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma
557+
; CHECK-NEXT: vcompress.vm v8, v10, v0
558+
; CHECK-NEXT: ret
559+
%ret = call <vscale x 8 x bfloat> @llvm.experimental.vector.compress.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %passthru)
560+
ret <vscale x 8 x bfloat> %ret
561+
}
562+
563+
define <vscale x 16 x bfloat> @vector_compress_nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask) {
564+
; CHECK-LABEL: vector_compress_nxv16bf16:
565+
; CHECK: # %bb.0:
566+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
567+
; CHECK-NEXT: vcompress.vm v12, v8, v0
568+
; CHECK-NEXT: vmv.v.v v8, v12
569+
; CHECK-NEXT: ret
570+
%ret = call <vscale x 16 x bfloat> @llvm.experimental.vector.compress.nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask, <vscale x 16 x bfloat> undef)
571+
ret <vscale x 16 x bfloat> %ret
572+
}
573+
574+
define <vscale x 16 x bfloat> @vector_compress_nxv16bf16_passthru(<vscale x 16 x bfloat> %passthru, <vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask) {
575+
; CHECK-LABEL: vector_compress_nxv16bf16_passthru:
576+
; CHECK: # %bb.0:
577+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma
578+
; CHECK-NEXT: vcompress.vm v8, v12, v0
579+
; CHECK-NEXT: ret
580+
%ret = call <vscale x 16 x bfloat> @llvm.experimental.vector.compress.nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask, <vscale x 16 x bfloat> %passthru)
581+
ret <vscale x 16 x bfloat> %ret
582+
}
583+
584+
define <vscale x 32 x bfloat> @vector_compress_nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask) {
585+
; CHECK-LABEL: vector_compress_nxv32bf16:
586+
; CHECK: # %bb.0:
587+
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
588+
; CHECK-NEXT: vcompress.vm v16, v8, v0
589+
; CHECK-NEXT: vmv.v.v v8, v16
590+
; CHECK-NEXT: ret
591+
%ret = call <vscale x 32 x bfloat> @llvm.experimental.vector.compress.nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask, <vscale x 32 x bfloat> undef)
592+
ret <vscale x 32 x bfloat> %ret
593+
}
594+
595+
define <vscale x 32 x bfloat> @vector_compress_nxv32bf16_passthru(<vscale x 32 x bfloat> %passthru, <vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask) {
596+
; CHECK-LABEL: vector_compress_nxv32bf16_passthru:
597+
; CHECK: # %bb.0:
598+
; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma
599+
; CHECK-NEXT: vcompress.vm v8, v16, v0
600+
; CHECK-NEXT: ret
601+
%ret = call <vscale x 32 x bfloat> @llvm.experimental.vector.compress.nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask, <vscale x 32 x bfloat> %passthru)
602+
ret <vscale x 32 x bfloat> %ret
603+
}
604+
475605
; Vector compress for f16 type
476606

477607
define <vscale x 1 x half> @vector_compress_nxv1f16(<vscale x 1 x half> %data, <vscale x 1 x i1> %mask) {

0 commit comments

Comments
 (0)