Skip to content

Commit 7f4422d

Browse files
committed
[AArch64] Add testing for shuffles that extend into new types. NFC
1 parent 34598fd commit 7f4422d

File tree

1 file changed

+264
-0
lines changed

1 file changed

+264
-0
lines changed
+264
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
4+
define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) {
5+
; CHECK-LABEL: test_v16i8_v2i32_824:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: umov w8, v0.b[8]
8+
; CHECK-NEXT: umov w9, v1.b[8]
9+
; CHECK-NEXT: fmov s0, w8
10+
; CHECK-NEXT: mov v0.s[1], w9
11+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
12+
; CHECK-NEXT: ret
13+
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> <i32 8, i32 24>
14+
%d = add <2 x i8> %c, %c
15+
ret <2 x i8> %d
16+
}
17+
18+
define <2 x i8> @test_v16i8_v2i32_016(<16 x i8> %a, <16 x i8> %b) {
19+
; CHECK-LABEL: test_v16i8_v2i32_016:
20+
; CHECK: // %bb.0:
21+
; CHECK-NEXT: umov w8, v0.b[0]
22+
; CHECK-NEXT: umov w9, v1.b[0]
23+
; CHECK-NEXT: fmov s0, w8
24+
; CHECK-NEXT: mov v0.s[1], w9
25+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
26+
; CHECK-NEXT: ret
27+
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> <i32 0, i32 16>
28+
%d = add <2 x i8> %c, %c
29+
ret <2 x i8> %d
30+
}
31+
32+
define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) {
33+
; CHECK-LABEL: test_v8i8_v2i32_08:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
36+
; CHECK-NEXT: umov w8, v0.b[0]
37+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
38+
; CHECK-NEXT: umov w9, v1.b[0]
39+
; CHECK-NEXT: fmov s0, w8
40+
; CHECK-NEXT: mov v0.s[1], w9
41+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
42+
; CHECK-NEXT: ret
43+
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <2 x i32> <i32 0, i32 8>
44+
%d = add <2 x i8> %c, %c
45+
ret <2 x i8> %d
46+
}
47+
48+
define <2 x i16> @test_v8i16_v2i32_08(<8 x i16> %a, <8 x i16> %b) {
49+
; CHECK-LABEL: test_v8i16_v2i32_08:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: umov w8, v0.h[0]
52+
; CHECK-NEXT: umov w9, v1.h[0]
53+
; CHECK-NEXT: fmov s0, w8
54+
; CHECK-NEXT: mov v0.s[1], w9
55+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
56+
; CHECK-NEXT: ret
57+
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <2 x i32> <i32 0, i32 8>
58+
%d = add <2 x i16> %c, %c
59+
ret <2 x i16> %d
60+
}
61+
62+
define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) {
63+
; CHECK-LABEL: test_v4i16_v2i32_04:
64+
; CHECK: // %bb.0:
65+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
66+
; CHECK-NEXT: umov w8, v0.h[0]
67+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
68+
; CHECK-NEXT: umov w9, v1.h[0]
69+
; CHECK-NEXT: fmov s0, w8
70+
; CHECK-NEXT: mov v0.s[1], w9
71+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
72+
; CHECK-NEXT: ret
73+
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <2 x i32> <i32 0, i32 4>
74+
%d = add <2 x i16> %c, %c
75+
ret <2 x i16> %d
76+
}
77+
78+
79+
define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) {
80+
; CHECK-LABEL: test_v16i8_v4i16_824:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: umov w8, v0.b[8]
83+
; CHECK-NEXT: umov w9, v1.b[8]
84+
; CHECK-NEXT: fmov s2, w8
85+
; CHECK-NEXT: umov w8, v0.b[0]
86+
; CHECK-NEXT: mov v2.h[1], w9
87+
; CHECK-NEXT: mov v2.h[2], w8
88+
; CHECK-NEXT: umov w8, v1.b[0]
89+
; CHECK-NEXT: mov v2.h[3], w8
90+
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
91+
; CHECK-NEXT: ret
92+
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> <i32 8, i32 24, i32 0, i32 16>
93+
%d = add <4 x i8> %c, %c
94+
ret <4 x i8> %d
95+
}
96+
97+
define <4 x i8> @test_v16i8_v4i16_016(<16 x i8> %a, <16 x i8> %b) {
98+
; CHECK-LABEL: test_v16i8_v4i16_016:
99+
; CHECK: // %bb.0:
100+
; CHECK-NEXT: umov w8, v0.b[0]
101+
; CHECK-NEXT: umov w9, v1.b[0]
102+
; CHECK-NEXT: fmov s2, w8
103+
; CHECK-NEXT: umov w8, v0.b[4]
104+
; CHECK-NEXT: mov v2.h[1], w9
105+
; CHECK-NEXT: mov v2.h[2], w8
106+
; CHECK-NEXT: umov w8, v1.b[4]
107+
; CHECK-NEXT: mov v2.h[3], w8
108+
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
109+
; CHECK-NEXT: ret
110+
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> <i32 0, i32 16, i32 4, i32 20>
111+
%d = add <4 x i8> %c, %c
112+
ret <4 x i8> %d
113+
}
114+
115+
define <4 x i8> @test_v8i8_v4i16_08(<8 x i8> %a, <8 x i8> %b) {
116+
; CHECK-LABEL: test_v8i8_v4i16_08:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
119+
; CHECK-NEXT: umov w8, v0.b[0]
120+
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
121+
; CHECK-NEXT: umov w9, v1.b[0]
122+
; CHECK-NEXT: fmov s2, w8
123+
; CHECK-NEXT: umov w8, v0.b[4]
124+
; CHECK-NEXT: mov v2.h[1], w9
125+
; CHECK-NEXT: mov v2.h[2], w8
126+
; CHECK-NEXT: umov w8, v1.b[4]
127+
; CHECK-NEXT: mov v2.h[3], w8
128+
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
129+
; CHECK-NEXT: ret
130+
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 8, i32 4, i32 12>
131+
%d = add <4 x i8> %c, %c
132+
ret <4 x i8> %d
133+
}
134+
135+
define <4 x i16> @test_v8i16_v4i16_08(<8 x i16> %a, <8 x i16> %b) {
136+
; CHECK-LABEL: test_v8i16_v4i16_08:
137+
; CHECK: // %bb.0:
138+
; CHECK-NEXT: adrp x8, .LCPI8_0
139+
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
140+
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
141+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
142+
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
143+
; CHECK-NEXT: add v0.4h, v0.4h, v0.4h
144+
; CHECK-NEXT: ret
145+
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 7, i32 6, i32 12>
146+
%d = add <4 x i16> %c, %c
147+
ret <4 x i16> %d
148+
}
149+
150+
define <4 x i16> @test_v4i16_v4i16_04(<4 x i16> %a, <4 x i16> %b) {
151+
; CHECK-LABEL: test_v4i16_v4i16_04:
152+
; CHECK: // %bb.0:
153+
; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #2
154+
; CHECK-NEXT: trn2 v0.4h, v1.4h, v0.4h
155+
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
156+
; CHECK-NEXT: add v0.4h, v0.4h, v0.4h
157+
; CHECK-NEXT: ret
158+
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 3, i32 5, i32 6>
159+
%d = add <4 x i16> %c, %c
160+
ret <4 x i16> %d
161+
}
162+
163+
164+
define i1 @test1(ptr %add.ptr, ptr %result, <2 x i64> %hi, <2 x i64> %lo) {
165+
; CHECK-LABEL: test1:
166+
; CHECK: // %bb.0:
167+
; CHECK-NEXT: ldr q2, [x0]
168+
; CHECK-NEXT: movi v3.16b, #1
169+
; CHECK-NEXT: mov w12, #1 // =0x1
170+
; CHECK-NEXT: cmgt v0.2d, v2.2d, v0.2d
171+
; CHECK-NEXT: cmgt v4.2d, v1.2d, v2.2d
172+
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
173+
; CHECK-NEXT: and v3.16b, v4.16b, v3.16b
174+
; CHECK-NEXT: umov w8, v0.b[8]
175+
; CHECK-NEXT: umov w9, v3.b[8]
176+
; CHECK-NEXT: umov w10, v0.b[0]
177+
; CHECK-NEXT: umov w11, v3.b[0]
178+
; CHECK-NEXT: sub v0.2d, v2.2d, v1.2d
179+
; CHECK-NEXT: dup v1.2d, x12
180+
; CHECK-NEXT: orr w8, w8, w9
181+
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
182+
; CHECK-NEXT: orr w8, w10, w8, lsl #1
183+
; CHECK-NEXT: orr w8, w8, w11
184+
; CHECK-NEXT: str q0, [x1]
185+
; CHECK-NEXT: tst w8, #0x3
186+
; CHECK-NEXT: cset w0, eq
187+
; CHECK-NEXT: ret
188+
%19 = load <2 x i64>, ptr %add.ptr, align 8
189+
%cmp = icmp sgt <2 x i64> %19, %hi
190+
%sext = sext <2 x i1> %cmp to <2 x i64>
191+
%20 = bitcast <2 x i64> %sext to <16 x i8>
192+
%21 = and <16 x i8> %20, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
193+
%storedv = extractelement <16 x i8> %21, i64 0
194+
%storedv.1 = extractelement <16 x i8> %21, i64 8
195+
%22 = shl nuw nsw i8 %storedv.1, 1
196+
%or.111 = or disjoint i8 %22, %storedv
197+
%cmp101 = icmp slt <2 x i64> %19, %lo
198+
%sext102 = sext <2 x i1> %cmp101 to <2 x i64>
199+
%23 = bitcast <2 x i64> %sext102 to <16 x i8>
200+
%24 = and <16 x i8> %23, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
201+
%storedv104 = extractelement <16 x i8> %24, i64 0
202+
%storedv.1105 = extractelement <16 x i8> %24, i64 8
203+
%25 = shl nuw nsw i8 %storedv.1105, 1
204+
%or.111106 = or disjoint i8 %25, %storedv104
205+
%reass.sub = sub <2 x i64> %19, %lo
206+
%add = add <2 x i64> %reass.sub, splat (i64 1)
207+
store <2 x i64> %add, ptr %result, align 8
208+
%or118 = or i8 %or.111, %or.111106
209+
%cmp24.not = icmp eq i8 %or118, 0
210+
ret i1 %cmp24.not
211+
}
212+
213+
define i1 @test2(ptr %add.ptr, ptr %result, <2 x i64> %hi, <2 x i64> %lo) {
214+
; CHECK-LABEL: test2:
215+
; CHECK: // %bb.0:
216+
; CHECK-NEXT: ldr q2, [x0]
217+
; CHECK-NEXT: movi v3.16b, #1
218+
; CHECK-NEXT: cmgt v0.2d, v2.2d, v0.2d
219+
; CHECK-NEXT: cmgt v4.2d, v1.2d, v2.2d
220+
; CHECK-NEXT: sub v1.2d, v2.2d, v1.2d
221+
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
222+
; CHECK-NEXT: and v3.16b, v4.16b, v3.16b
223+
; CHECK-NEXT: umov w8, v0.b[8]
224+
; CHECK-NEXT: umov w9, v3.b[8]
225+
; CHECK-NEXT: umov w10, v0.b[0]
226+
; CHECK-NEXT: fmov s0, w8
227+
; CHECK-NEXT: umov w8, v3.b[0]
228+
; CHECK-NEXT: fmov s3, w10
229+
; CHECK-NEXT: mov v0.s[1], w9
230+
; CHECK-NEXT: mov w9, #1 // =0x1
231+
; CHECK-NEXT: mov v3.s[1], w8
232+
; CHECK-NEXT: dup v2.2d, x9
233+
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
234+
; CHECK-NEXT: orr v0.8b, v0.8b, v3.8b
235+
; CHECK-NEXT: mov w8, v0.s[1]
236+
; CHECK-NEXT: fmov w9, s0
237+
; CHECK-NEXT: add v0.2d, v1.2d, v2.2d
238+
; CHECK-NEXT: str q0, [x1]
239+
; CHECK-NEXT: orr w8, w9, w8
240+
; CHECK-NEXT: tst w8, #0xff
241+
; CHECK-NEXT: cset w0, eq
242+
; CHECK-NEXT: ret
243+
%1 = load <2 x i64>, ptr %add.ptr, align 8
244+
%cmp = icmp sgt <2 x i64> %1, %hi
245+
%sext = sext <2 x i1> %cmp to <2 x i64>
246+
%2 = bitcast <2 x i64> %sext to <16 x i8>
247+
%3 = and <16 x i8> %2, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
248+
%cmp101 = icmp slt <2 x i64> %1, %lo
249+
%sext102 = sext <2 x i1> %cmp101 to <2 x i64>
250+
%4 = bitcast <2 x i64> %sext102 to <16 x i8>
251+
%5 = and <16 x i8> %4, <i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 1, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>
252+
%6 = shufflevector <16 x i8> %3, <16 x i8> %5, <2 x i32> <i32 8, i32 24>
253+
%7 = shl nuw nsw <2 x i8> %6, splat (i8 1)
254+
%8 = shufflevector <16 x i8> %3, <16 x i8> %5, <2 x i32> <i32 0, i32 16>
255+
%9 = or disjoint <2 x i8> %7, %8
256+
%reass.sub = sub <2 x i64> %1, %lo
257+
%add = add <2 x i64> %reass.sub, splat (i64 1)
258+
store <2 x i64> %add, ptr %result, align 8
259+
%10 = extractelement <2 x i8> %9, i32 0
260+
%11 = extractelement <2 x i8> %9, i32 1
261+
%or118 = or i8 %10, %11
262+
%cmp24.not = icmp eq i8 %or118, 0
263+
ret i1 %cmp24.not
264+
}

0 commit comments

Comments
 (0)