Skip to content

Commit a3be17e

Browse files
committed
[AArch64] Expand v8i8 cttz (PR39729)
Fix for https://bugs.llvm.org/show_bug.cgi?id=39729. Rather than adding just a case for v8i8 I'm setting cttz to expand for all vector types. Differential Revision: https://reviews.llvm.org/D58008 llvm-svn: 353872
1 parent 7403fac commit a3be17e

File tree

2 files changed

+125
-9
lines changed

2 files changed

+125
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+1-9
Original file line numberDiff line numberDiff line change
@@ -681,15 +681,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
681681
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
682682
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
683683

684-
setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
685-
setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
686-
setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
687-
setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
688-
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
689-
setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
690-
setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
691-
setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
692-
693684
// AArch64 doesn't have MUL.2d:
694685
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
695686
// Custom handling for some quad-vector types to detect MULL.
@@ -728,6 +719,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
728719
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
729720

730721
setOperationAction(ISD::BSWAP, VT, Expand);
722+
setOperationAction(ISD::CTTZ, VT, Expand);
731723

732724
for (MVT InnerVT : MVT::vector_valuetypes()) {
733725
setTruncStoreAction(VT, InnerVT, Expand);

llvm/test/CodeGen/AArch64/vec_cttz.ll

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
3+
4+
declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
5+
declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
6+
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
7+
declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1)
8+
9+
declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
10+
declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
11+
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
12+
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
13+
14+
define <8 x i8> @cttz_v8i8(<8 x i8> %a) nounwind {
15+
; CHECK-LABEL: cttz_v8i8:
16+
; CHECK: // %bb.0:
17+
; CHECK-NEXT: movi v1.8b, #1
18+
; CHECK-NEXT: sub v1.8b, v0.8b, v1.8b
19+
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
20+
; CHECK-NEXT: cnt v0.8b, v0.8b
21+
; CHECK-NEXT: ret
22+
%b = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
23+
ret <8 x i8> %b
24+
}
25+
26+
define <4 x i16> @cttz_v4i16(<4 x i16> %a) nounwind {
27+
; CHECK-LABEL: cttz_v4i16:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: movi v1.4h, #1
30+
; CHECK-NEXT: sub v1.4h, v0.4h, v1.4h
31+
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
32+
; CHECK-NEXT: clz v0.4h, v0.4h
33+
; CHECK-NEXT: movi v1.4h, #16
34+
; CHECK-NEXT: sub v0.4h, v1.4h, v0.4h
35+
; CHECK-NEXT: ret
36+
%b = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
37+
ret <4 x i16> %b
38+
}
39+
40+
define <2 x i32> @cttz_v2i32(<2 x i32> %a) nounwind {
41+
; CHECK-LABEL: cttz_v2i32:
42+
; CHECK: // %bb.0:
43+
; CHECK-NEXT: movi v1.2s, #1
44+
; CHECK-NEXT: sub v1.2s, v0.2s, v1.2s
45+
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
46+
; CHECK-NEXT: clz v0.2s, v0.2s
47+
; CHECK-NEXT: movi v1.2s, #32
48+
; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s
49+
; CHECK-NEXT: ret
50+
%b = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
51+
ret <2 x i32> %b
52+
}
53+
54+
define <1 x i64> @cttz_v1i64(<1 x i64> %a) nounwind {
55+
; CHECK-LABEL: cttz_v1i64:
56+
; CHECK: // %bb.0:
57+
; CHECK-NEXT: orr w8, wzr, #0x1
58+
; CHECK-NEXT: fmov d1, x8
59+
; CHECK-NEXT: sub d1, d0, d1
60+
; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
61+
; CHECK-NEXT: cnt v0.8b, v0.8b
62+
; CHECK-NEXT: uaddlp v0.4h, v0.8b
63+
; CHECK-NEXT: uaddlp v0.2s, v0.4h
64+
; CHECK-NEXT: uaddlp v0.1d, v0.2s
65+
; CHECK-NEXT: ret
66+
%b = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
67+
ret <1 x i64> %b
68+
}
69+
70+
define <16 x i8> @cttz_v16i8(<16 x i8> %a) nounwind {
71+
; CHECK-LABEL: cttz_v16i8:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: movi v1.16b, #1
74+
; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b
75+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
76+
; CHECK-NEXT: cnt v0.16b, v0.16b
77+
; CHECK-NEXT: ret
78+
%b = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
79+
ret <16 x i8> %b
80+
}
81+
82+
define <8 x i16> @cttz_v8i16(<8 x i16> %a) nounwind {
83+
; CHECK-LABEL: cttz_v8i16:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: movi v1.8h, #1
86+
; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h
87+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
88+
; CHECK-NEXT: clz v0.8h, v0.8h
89+
; CHECK-NEXT: movi v1.8h, #16
90+
; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h
91+
; CHECK-NEXT: ret
92+
%b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
93+
ret <8 x i16> %b
94+
}
95+
96+
define <4 x i32> @cttz_v4i32(<4 x i32> %a) nounwind {
97+
; CHECK-LABEL: cttz_v4i32:
98+
; CHECK: // %bb.0:
99+
; CHECK-NEXT: movi v1.4s, #1
100+
; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s
101+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
102+
; CHECK-NEXT: clz v0.4s, v0.4s
103+
; CHECK-NEXT: movi v1.4s, #32
104+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
105+
; CHECK-NEXT: ret
106+
%b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
107+
ret <4 x i32> %b
108+
}
109+
110+
define <2 x i64> @cttz_v2i64(<2 x i64> %a) nounwind {
111+
; CHECK-LABEL: cttz_v2i64:
112+
; CHECK: // %bb.0:
113+
; CHECK-NEXT: orr w8, wzr, #0x1
114+
; CHECK-NEXT: dup v1.2d, x8
115+
; CHECK-NEXT: sub v1.2d, v0.2d, v1.2d
116+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
117+
; CHECK-NEXT: cnt v0.16b, v0.16b
118+
; CHECK-NEXT: uaddlp v0.8h, v0.16b
119+
; CHECK-NEXT: uaddlp v0.4s, v0.8h
120+
; CHECK-NEXT: uaddlp v0.2d, v0.4s
121+
; CHECK-NEXT: ret
122+
%b = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
123+
ret <2 x i64> %b
124+
}

0 commit comments

Comments
 (0)