Skip to content

Commit 22f423a

Browse files
committed
[ARM] Add some extra testing for MVE postinc loops. NFC
1 parent 34415fd commit 22f423a

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
3+
4+
define nonnull ptr @useafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) {
5+
; CHECK-LABEL: useafterloop:
6+
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: .save {r7, lr}
8+
; CHECK-NEXT: push {r7, lr}
9+
; CHECK-NEXT: mov.w lr, #64
10+
; CHECK-NEXT: mov r12, r2
11+
; CHECK-NEXT: movs r3, #0
12+
; CHECK-NEXT: .LBB0_1: @ %while.body
13+
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
14+
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
15+
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
16+
; CHECK-NEXT: add.w r2, r12, r3
17+
; CHECK-NEXT: adds r3, #16
18+
; CHECK-NEXT: vadd.f32 q0, q1, q0
19+
; CHECK-NEXT: vstrw.32 q0, [r2]
20+
; CHECK-NEXT: le lr, .LBB0_1
21+
; CHECK-NEXT: @ %bb.2: @ %while.end
22+
; CHECK-NEXT: mov r0, r12
23+
; CHECK-NEXT: pop {r7, pc}
24+
entry:
25+
br label %while.body
26+
27+
while.body:
28+
%pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
29+
%pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
30+
%pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
31+
%blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
32+
%0 = load <4 x float>, ptr %pSrcA.addr.012, align 4
33+
%1 = load <4 x float>, ptr %pSrcB.addr.011, align 4
34+
%2 = fadd fast <4 x float> %1, %0
35+
store <4 x float> %2, ptr %pDst.addr.010, align 4
36+
%add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4
37+
%add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4
38+
%add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4
39+
%dec = add nsw i32 %blkCnt.09, -1
40+
%cmp.not = icmp eq i32 %dec, 0
41+
br i1 %cmp.not, label %while.end, label %while.body
42+
43+
while.end:
44+
ret ptr %pDst
45+
}
46+
47+
48+
define nonnull ptr @nouse(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) {
49+
; CHECK-LABEL: nouse:
50+
; CHECK: @ %bb.0: @ %entry
51+
; CHECK-NEXT: .save {r7, lr}
52+
; CHECK-NEXT: push {r7, lr}
53+
; CHECK-NEXT: mov.w lr, #64
54+
; CHECK-NEXT: mov r3, r2
55+
; CHECK-NEXT: .LBB1_1: @ %while.body
56+
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
57+
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
58+
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
59+
; CHECK-NEXT: vadd.f32 q0, q1, q0
60+
; CHECK-NEXT: vstrb.8 q0, [r3], #16
61+
; CHECK-NEXT: le lr, .LBB1_1
62+
; CHECK-NEXT: @ %bb.2: @ %while.end
63+
; CHECK-NEXT: adds r0, r2, #4
64+
; CHECK-NEXT: pop {r7, pc}
65+
entry:
66+
br label %while.body
67+
68+
while.body:
69+
%pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
70+
%pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
71+
%pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
72+
%blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
73+
%0 = load <4 x float>, ptr %pSrcA.addr.012, align 4
74+
%1 = load <4 x float>, ptr %pSrcB.addr.011, align 4
75+
%2 = fadd fast <4 x float> %1, %0
76+
store <4 x float> %2, ptr %pDst.addr.010, align 4
77+
%add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4
78+
%add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4
79+
%add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4
80+
%dec = add nsw i32 %blkCnt.09, -1
81+
%cmp.not = icmp eq i32 %dec, 0
82+
br i1 %cmp.not, label %while.end, label %while.body
83+
84+
while.end:
85+
%add.ptr3 = getelementptr inbounds float, ptr %pDst, i32 1
86+
ret ptr %add.ptr3
87+
}
88+
89+
define nofpclass(nan inf) float @manyusesafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr nocapture noundef %pDst, i32 noundef %blockSize) {
90+
; CHECK-LABEL: manyusesafterloop:
91+
; CHECK: @ %bb.0: @ %entry
92+
; CHECK-NEXT: .save {r4, lr}
93+
; CHECK-NEXT: push {r4, lr}
94+
; CHECK-NEXT: mov.w lr, #64
95+
; CHECK-NEXT: movs r3, #0
96+
; CHECK-NEXT: .LBB2_1: @ %while.body
97+
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
98+
; CHECK-NEXT: add.w r12, r0, r3
99+
; CHECK-NEXT: adds r4, r1, r3
100+
; CHECK-NEXT: vldrw.u32 q1, [r4]
101+
; CHECK-NEXT: vldrw.u32 q0, [r12]
102+
; CHECK-NEXT: adds r4, r2, r3
103+
; CHECK-NEXT: adds r3, #16
104+
; CHECK-NEXT: vadd.f32 q0, q1, q0
105+
; CHECK-NEXT: vstrw.32 q0, [r4]
106+
; CHECK-NEXT: le lr, .LBB2_1
107+
; CHECK-NEXT: @ %bb.2: @ %while.end
108+
; CHECK-NEXT: vldr s0, [r2]
109+
; CHECK-NEXT: vldr s2, [r0]
110+
; CHECK-NEXT: vadd.f32 s0, s2, s0
111+
; CHECK-NEXT: vldr s2, [r1]
112+
; CHECK-NEXT: vadd.f32 s0, s0, s2
113+
; CHECK-NEXT: vmov r0, s0
114+
; CHECK-NEXT: pop {r4, pc}
115+
entry:
116+
br label %while.body
117+
118+
while.body:
119+
%pSrcA.addr.016 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ]
120+
%pSrcB.addr.015 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ]
121+
%pDst.addr.014 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ]
122+
%blkCnt.013 = phi i32 [ 64, %entry ], [ %dec, %while.body ]
123+
%0 = load <4 x float>, ptr %pSrcA.addr.016, align 4
124+
%1 = load <4 x float>, ptr %pSrcB.addr.015, align 4
125+
%2 = fadd fast <4 x float> %1, %0
126+
store <4 x float> %2, ptr %pDst.addr.014, align 4
127+
%add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.016, i32 4
128+
%add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.015, i32 4
129+
%add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.014, i32 4
130+
%dec = add nsw i32 %blkCnt.013, -1
131+
%cmp.not = icmp eq i32 %dec, 0
132+
br i1 %cmp.not, label %while.end, label %while.body
133+
134+
while.end:
135+
%3 = load float, ptr %pDst, align 4
136+
%4 = load float, ptr %pSrcA, align 4
137+
%add = fadd fast float %4, %3
138+
%5 = load float, ptr %pSrcB, align 4
139+
%add5 = fadd fast float %add, %5
140+
ret float %add5
141+
}
142+

0 commit comments

Comments
 (0)