Skip to content

Commit a113a58

Browse files
[NFCI] Regenerate LoopVectorize test checks
1 parent d3684c3 commit a113a58

File tree

211 files changed

+30890
-6209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

211 files changed

+30890
-6209
lines changed

llvm/test/Transforms/LoopVectorize/2012-10-20-infloop.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce
23

34
; Check that we don't fall into an infinite loop.
45
define void @test() nounwind {
56
entry:
6-
br label %for.body
7+
br label %for.body
78

89
for.body:
9-
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
10-
br label %for.body
10+
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
11+
br label %for.body
1112
}
1213

1314

1415

1516
define void @test2() nounwind {
1617
entry:
17-
br label %for.body
18+
br label %for.body
1819

1920
for.body: ; preds = %for.body, %entry
20-
%indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
21-
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
22-
%indvars.iv.next48 = add i64 %indvars.iv47, 1
23-
br i1 undef, label %for.end, label %for.body
21+
%indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
22+
%0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
23+
%indvars.iv.next48 = add i64 %indvars.iv47, 1
24+
br i1 undef, label %for.end, label %for.body
2425

2526
for.end: ; preds = %for.body
26-
unreachable
27+
unreachable
2728
}
2829

2930
;PR14701

llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4
23

34
; Check that we don't crash.
45

llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt -Oz -S -enable-new-pm=0 < %s | FileCheck %s
23
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s
34

@@ -10,7 +11,57 @@ target triple = "arm64-apple-ios5.0.0"
1011

1112
define void @foo(float* noalias nocapture %ptrA, float* noalias nocapture readonly %ptrB, i64 %size) {
1213
; CHECK-LABEL: @foo(
13-
; CHECK: fmul <4 x float>
14+
; CHECK-NEXT: entry:
15+
; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[SIZE:%.*]], 0
16+
; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
17+
; CHECK: for.body.preheader:
18+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SIZE]], 8
19+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER6:%.*]], label [[VECTOR_PH:%.*]]
20+
; CHECK: vector.ph:
21+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SIZE]], -8
22+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
23+
; CHECK: vector.body:
24+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[PTRB:%.*]], i64 [[INDEX]]
26+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
27+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
28+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 4
29+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
30+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
31+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[PTRA:%.*]], i64 [[INDEX]]
32+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
33+
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4
34+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
35+
; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
36+
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4
37+
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]]
38+
; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD5]]
39+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
40+
; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP10]], align 4
41+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
42+
; CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[TMP11]], align 4
43+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
44+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
45+
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
46+
; CHECK: middle.block:
47+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[SIZE]]
48+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER6]]
49+
; CHECK: for.body.preheader6:
50+
; CHECK-NEXT: [[INDVARS_IV2_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
51+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
52+
; CHECK: for.body:
53+
; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV2_PH]], [[FOR_BODY_PREHEADER6]] ]
54+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTRB]], i64 [[INDVARS_IV2]]
55+
; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
56+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[PTRA]], i64 [[INDVARS_IV2]]
57+
; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX2]], align 4
58+
; CHECK-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP14]]
59+
; CHECK-NEXT: store float [[MUL3]], float* [[ARRAYIDX2]], align 4
60+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV2]], 1
61+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[SIZE]]
62+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
63+
; CHECK: for.cond.cleanup:
64+
; CHECK-NEXT: ret void
1465
;
1566
entry:
1667
br label %for.cond

llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt < %s -loop-vectorize -mtriple=aarch64-none-linux-gnu -mattr=+neon -S | FileCheck %s
23
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
34

45
; Function Attrs: nounwind
56
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
6-
;CHECK-LABEL: array_add
7-
;CHECK: load <4 x i32>
8-
;CHECK: load <4 x i32>
9-
;CHECK: load <4 x i32>
10-
;CHECK: load <4 x i32>
11-
;CHECK: add nsw <4 x i32>
12-
;CHECK: add nsw <4 x i32>
13-
;CHECK: store <4 x i32>
14-
;CHECK: store <4 x i32>
15-
;CHECK: ret
7+
; CHECK-LABEL: @array_add(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
10+
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
11+
; CHECK: for.body.preheader:
12+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1
13+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
14+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
15+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
16+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17+
; CHECK: vector.ph:
18+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
19+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
20+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21+
; CHECK: vector.body:
22+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
23+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
24+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
25+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
26+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
27+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
28+
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
29+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
30+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
31+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
32+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
33+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
34+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
36+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
37+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
38+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
39+
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
40+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4
41+
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
42+
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD1]]
43+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP3]]
44+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
45+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
46+
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
47+
; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4
48+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
49+
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
50+
; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4
51+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
52+
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53+
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
54+
; CHECK: middle.block:
55+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
56+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
57+
; CHECK: scalar.ph:
58+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
59+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
60+
; CHECK: for.body:
61+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
62+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
63+
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
64+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
65+
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
66+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
67+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
68+
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
69+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
70+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
71+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]]
72+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
73+
; CHECK: for.end.loopexit:
74+
; CHECK-NEXT: br label [[FOR_END]]
75+
; CHECK: for.end:
76+
; CHECK-NEXT: ret i32* [[C]]
77+
;
1678
entry:
1779
%cmp10 = icmp sgt i32 %size, 0
1880
br i1 %cmp10, label %for.body.preheader, label %for.end

llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt < %s -loop-vectorize -mtriple=arm64-none-linux-gnu -mattr=+neon -S | FileCheck %s
23
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
34

45
; Function Attrs: nounwind
56
define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
6-
;CHECK-LABEL: array_add
7-
;CHECK: load <4 x i32>
8-
;CHECK: load <4 x i32>
9-
;CHECK: load <4 x i32>
10-
;CHECK: load <4 x i32>
11-
;CHECK: add nsw <4 x i32>
12-
;CHECK: add nsw <4 x i32>
13-
;CHECK: store <4 x i32>
14-
;CHECK: store <4 x i32>
15-
;CHECK: ret
7+
; CHECK-LABEL: @array_add(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
10+
; CHECK-NEXT: br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
11+
; CHECK: for.body.preheader:
12+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1
13+
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
14+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
15+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
16+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17+
; CHECK: vector.ph:
18+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
19+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
20+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21+
; CHECK: vector.body:
22+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
23+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
24+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
25+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
26+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
27+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
28+
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
29+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
30+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
31+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
32+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
33+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
34+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
36+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
37+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
38+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
39+
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
40+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4
41+
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD2]], [[WIDE_LOAD]]
42+
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD1]]
43+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP3]]
44+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
45+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
46+
; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
47+
; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4
48+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
49+
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
50+
; CHECK-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4
51+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
52+
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53+
; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
54+
; CHECK: middle.block:
55+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
56+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
57+
; CHECK: scalar.ph:
58+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
59+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
60+
; CHECK: for.body:
61+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
62+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
63+
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
64+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
65+
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
66+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]]
67+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
68+
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
69+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
70+
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
71+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]]
72+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
73+
; CHECK: for.end.loopexit:
74+
; CHECK-NEXT: br label [[FOR_END]]
75+
; CHECK: for.end:
76+
; CHECK-NEXT: ret i32* [[C]]
77+
;
1678
entry:
1779
%cmp10 = icmp sgt i32 %size, 0
1880
br i1 %cmp10, label %for.body.preheader, label %for.end

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
12
; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s
23

34
; This test currently fails when the LV calculates a maximums safe
@@ -15,6 +16,47 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
1516

1617

1718
define void @f1(i32* %A) #0 {
19+
; CHECK-LABEL: @f1(
20+
; CHECK-NEXT: entry:
21+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
22+
; CHECK: vector.ph:
23+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
24+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
25+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
26+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
27+
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
28+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
29+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
30+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
31+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
32+
; CHECK: vector.body:
33+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
34+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
35+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP5]], i64 1024)
36+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP5]]
37+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
38+
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <vscale x 4 x i32>*
39+
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP8]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
40+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
41+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
42+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
43+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
44+
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
45+
; CHECK: middle.block:
46+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
47+
; CHECK: scalar.ph:
48+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
49+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
50+
; CHECK: for.body:
51+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
52+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
53+
; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4
54+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
55+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
56+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
57+
; CHECK: exit:
58+
; CHECK-NEXT: ret void
59+
;
1860
entry:
1961
br label %for.body
2062

0 commit comments

Comments
 (0)