|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| 2 | +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s |
| 3 | + |
| 4 | +define nonnull ptr @useafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) { |
| 5 | +; CHECK-LABEL: useafterloop: |
| 6 | +; CHECK: @ %bb.0: @ %entry |
| 7 | +; CHECK-NEXT: .save {r7, lr} |
| 8 | +; CHECK-NEXT: push {r7, lr} |
| 9 | +; CHECK-NEXT: mov.w lr, #64 |
| 10 | +; CHECK-NEXT: mov r12, r2 |
| 11 | +; CHECK-NEXT: movs r3, #0 |
| 12 | +; CHECK-NEXT: .LBB0_1: @ %while.body |
| 13 | +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| 14 | +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 |
| 15 | +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 |
| 16 | +; CHECK-NEXT: add.w r2, r12, r3 |
| 17 | +; CHECK-NEXT: adds r3, #16 |
| 18 | +; CHECK-NEXT: vadd.f32 q0, q1, q0 |
| 19 | +; CHECK-NEXT: vstrw.32 q0, [r2] |
| 20 | +; CHECK-NEXT: le lr, .LBB0_1 |
| 21 | +; CHECK-NEXT: @ %bb.2: @ %while.end |
| 22 | +; CHECK-NEXT: mov r0, r12 |
| 23 | +; CHECK-NEXT: pop {r7, pc} |
| 24 | +entry: |
| 25 | + br label %while.body |
| 26 | + |
| 27 | +while.body: |
| 28 | + %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] |
| 29 | + %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] |
| 30 | + %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] |
| 31 | + %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ] |
| 32 | + %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4 |
| 33 | + %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4 |
| 34 | + %2 = fadd fast <4 x float> %1, %0 |
| 35 | + store <4 x float> %2, ptr %pDst.addr.010, align 4 |
| 36 | + %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4 |
| 37 | + %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4 |
| 38 | + %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4 |
| 39 | + %dec = add nsw i32 %blkCnt.09, -1 |
| 40 | + %cmp.not = icmp eq i32 %dec, 0 |
| 41 | + br i1 %cmp.not, label %while.end, label %while.body |
| 42 | + |
| 43 | +while.end: |
| 44 | + ret ptr %pDst |
| 45 | +} |
| 46 | + |
| 47 | + |
| 48 | +define nonnull ptr @nouse(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr noundef writeonly %pDst, i32 noundef %blockSize) { |
| 49 | +; CHECK-LABEL: nouse: |
| 50 | +; CHECK: @ %bb.0: @ %entry |
| 51 | +; CHECK-NEXT: .save {r7, lr} |
| 52 | +; CHECK-NEXT: push {r7, lr} |
| 53 | +; CHECK-NEXT: mov.w lr, #64 |
| 54 | +; CHECK-NEXT: mov r3, r2 |
| 55 | +; CHECK-NEXT: .LBB1_1: @ %while.body |
| 56 | +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| 57 | +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 |
| 58 | +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 |
| 59 | +; CHECK-NEXT: vadd.f32 q0, q1, q0 |
| 60 | +; CHECK-NEXT: vstrb.8 q0, [r3], #16 |
| 61 | +; CHECK-NEXT: le lr, .LBB1_1 |
| 62 | +; CHECK-NEXT: @ %bb.2: @ %while.end |
| 63 | +; CHECK-NEXT: adds r0, r2, #4 |
| 64 | +; CHECK-NEXT: pop {r7, pc} |
| 65 | +entry: |
| 66 | + br label %while.body |
| 67 | + |
| 68 | +while.body: |
| 69 | + %pSrcA.addr.012 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] |
| 70 | + %pSrcB.addr.011 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] |
| 71 | + %pDst.addr.010 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] |
| 72 | + %blkCnt.09 = phi i32 [ 64, %entry ], [ %dec, %while.body ] |
| 73 | + %0 = load <4 x float>, ptr %pSrcA.addr.012, align 4 |
| 74 | + %1 = load <4 x float>, ptr %pSrcB.addr.011, align 4 |
| 75 | + %2 = fadd fast <4 x float> %1, %0 |
| 76 | + store <4 x float> %2, ptr %pDst.addr.010, align 4 |
| 77 | + %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.012, i32 4 |
| 78 | + %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.011, i32 4 |
| 79 | + %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.010, i32 4 |
| 80 | + %dec = add nsw i32 %blkCnt.09, -1 |
| 81 | + %cmp.not = icmp eq i32 %dec, 0 |
| 82 | + br i1 %cmp.not, label %while.end, label %while.body |
| 83 | + |
| 84 | +while.end: |
| 85 | + %add.ptr3 = getelementptr inbounds float, ptr %pDst, i32 1 |
| 86 | + ret ptr %add.ptr3 |
| 87 | +} |
| 88 | + |
| 89 | +define nofpclass(nan inf) float @manyusesafterloop(ptr nocapture noundef readonly %pSrcA, ptr nocapture noundef readonly %pSrcB, ptr nocapture noundef %pDst, i32 noundef %blockSize) { |
| 90 | +; CHECK-LABEL: manyusesafterloop: |
| 91 | +; CHECK: @ %bb.0: @ %entry |
| 92 | +; CHECK-NEXT: .save {r4, lr} |
| 93 | +; CHECK-NEXT: push {r4, lr} |
| 94 | +; CHECK-NEXT: mov.w lr, #64 |
| 95 | +; CHECK-NEXT: movs r3, #0 |
| 96 | +; CHECK-NEXT: .LBB2_1: @ %while.body |
| 97 | +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| 98 | +; CHECK-NEXT: add.w r12, r0, r3 |
| 99 | +; CHECK-NEXT: adds r4, r1, r3 |
| 100 | +; CHECK-NEXT: vldrw.u32 q1, [r4] |
| 101 | +; CHECK-NEXT: vldrw.u32 q0, [r12] |
| 102 | +; CHECK-NEXT: adds r4, r2, r3 |
| 103 | +; CHECK-NEXT: adds r3, #16 |
| 104 | +; CHECK-NEXT: vadd.f32 q0, q1, q0 |
| 105 | +; CHECK-NEXT: vstrw.32 q0, [r4] |
| 106 | +; CHECK-NEXT: le lr, .LBB2_1 |
| 107 | +; CHECK-NEXT: @ %bb.2: @ %while.end |
| 108 | +; CHECK-NEXT: vldr s0, [r2] |
| 109 | +; CHECK-NEXT: vldr s2, [r0] |
| 110 | +; CHECK-NEXT: vadd.f32 s0, s2, s0 |
| 111 | +; CHECK-NEXT: vldr s2, [r1] |
| 112 | +; CHECK-NEXT: vadd.f32 s0, s0, s2 |
| 113 | +; CHECK-NEXT: vmov r0, s0 |
| 114 | +; CHECK-NEXT: pop {r4, pc} |
| 115 | +entry: |
| 116 | + br label %while.body |
| 117 | + |
| 118 | +while.body: |
| 119 | + %pSrcA.addr.016 = phi ptr [ %pSrcA, %entry ], [ %add.ptr, %while.body ] |
| 120 | + %pSrcB.addr.015 = phi ptr [ %pSrcB, %entry ], [ %add.ptr1, %while.body ] |
| 121 | + %pDst.addr.014 = phi ptr [ %pDst, %entry ], [ %add.ptr2, %while.body ] |
| 122 | + %blkCnt.013 = phi i32 [ 64, %entry ], [ %dec, %while.body ] |
| 123 | + %0 = load <4 x float>, ptr %pSrcA.addr.016, align 4 |
| 124 | + %1 = load <4 x float>, ptr %pSrcB.addr.015, align 4 |
| 125 | + %2 = fadd fast <4 x float> %1, %0 |
| 126 | + store <4 x float> %2, ptr %pDst.addr.014, align 4 |
| 127 | + %add.ptr = getelementptr inbounds float, ptr %pSrcA.addr.016, i32 4 |
| 128 | + %add.ptr1 = getelementptr inbounds float, ptr %pSrcB.addr.015, i32 4 |
| 129 | + %add.ptr2 = getelementptr inbounds float, ptr %pDst.addr.014, i32 4 |
| 130 | + %dec = add nsw i32 %blkCnt.013, -1 |
| 131 | + %cmp.not = icmp eq i32 %dec, 0 |
| 132 | + br i1 %cmp.not, label %while.end, label %while.body |
| 133 | + |
| 134 | +while.end: |
| 135 | + %3 = load float, ptr %pDst, align 4 |
| 136 | + %4 = load float, ptr %pSrcA, align 4 |
| 137 | + %add = fadd fast float %4, %3 |
| 138 | + %5 = load float, ptr %pSrcB, align 4 |
| 139 | + %add5 = fadd fast float %add, %5 |
| 140 | + ret float %add5 |
| 141 | +} |
| 142 | + |
0 commit comments