Skip to content

Commit 19aec00

Browse files
committed
[SLP]Initial support for (masked)loads + compress and (masked)interleaved
Added initial support for (masked)loads + compress and (masked)interleaved loads. Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #132099
1 parent cd2f85a commit 19aec00

15 files changed

+596
-351
lines changed

Diff for: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+353-25
Large diffs are not rendered by default.

Diff for: llvm/test/Transforms/SLPVectorizer/X86/entries-shuffled-diff-sizes.ll

+7-10
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,16 @@ define void @test() {
1515
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc ninf nsz arcp contract afn float [[GEPLOAD1612]], [[TMP1]]
1616
; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc ninf nsz arcp contract afn <16 x float> [[TMP4]], [[TMP0]]
1717
; CHECK-NEXT: store <16 x float> [[TMP6]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2928), align 16
18-
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1272), align 16
19-
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1288), align 16
20-
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1296), align 16
21-
; CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1304), align 16
18+
; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1272), align 16
2219
; CHECK-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 1620), align 4
23-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP8]], <16 x i32> <i32 poison, i32 0, i32 2, i32 1, i32 0, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
20+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP7]], <16 x i32> <i32 poison, i32 0, i32 20, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2422
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP12]], <16 x i32> <i32 1, i32 1, i32 17, i32 17, i32 18, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 19, i32 19, i32 19, i32 19>
26-
; CHECK-NEXT: [[TMP15:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP12]], <8 x float> [[TMP13]], i64 8)
27-
; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP15]], <4 x float> [[TMP7]], i64 0)
28-
; CHECK-NEXT: [[TMP17:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP16]], <2 x float> [[TMP9]], i64 6)
23+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP10]], <16 x i32> <i32 1, i32 1, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 19, i32 19, i32 19, i32 19, i32 18>
24+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18>
2925
; CHECK-NEXT: [[TMP18:%.*]] = fmul reassoc ninf nsz arcp contract afn <16 x float> [[TMP14]], [[TMP17]]
30-
; CHECK-NEXT: store <16 x float> [[TMP18]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2992), align 16
26+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x float> [[TMP18]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 15, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
27+
; CHECK-NEXT: store <16 x float> [[TMP15]], ptr getelementptr ([16000 x i8], ptr @GLOB, i64 0, i64 2992), align 16
3128
; CHECK-NEXT: ret void
3229
;
3330
alloca_0:

Diff for: llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll

+6-16
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,9 @@ define void @test() {
99
; CHECK-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null
1010
; CHECK-NEXT: br label [[BB:%.*]]
1111
; CHECK: bb:
12-
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[COND_IN_V]], align 8
13-
; CHECK-NEXT: [[BV:%.*]] = icmp eq i64 [[V]], 0
14-
; CHECK-NEXT: [[IN_1:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 4
15-
; CHECK-NEXT: [[V_1:%.*]] = load i64, ptr [[IN_1]], align 8
16-
; CHECK-NEXT: [[BV_1:%.*]] = icmp eq i64 [[V_1]], 0
17-
; CHECK-NEXT: [[IN_2:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 8
18-
; CHECK-NEXT: [[V_2:%.*]] = load i64, ptr [[IN_2]], align 8
19-
; CHECK-NEXT: [[BV_2:%.*]] = icmp eq i64 [[V_2]], 0
20-
; CHECK-NEXT: [[IN_3:%.*]] = getelementptr i64, ptr [[COND_IN_V]], i64 12
21-
; CHECK-NEXT: [[V_3:%.*]] = load i64, ptr [[IN_3]], align 8
22-
; CHECK-NEXT: [[BV_3:%.*]] = icmp eq i64 [[V_3]], 0
12+
; CHECK-NEXT: [[TMP0:%.*]] = call <13 x i64> @llvm.masked.load.v13i64.p0(ptr [[COND_IN_V]], i32 8, <13 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <13 x i64> poison)
13+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <13 x i64> [[TMP0]], <13 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
14+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
2315
; CHECK-NEXT: ret void
2416
;
2517
; CHECK-SLP-THRESHOLD-LABEL: define void @test
@@ -28,11 +20,9 @@ define void @test() {
2820
; CHECK-SLP-THRESHOLD-NEXT: [[COND_IN_V:%.*]] = select i1 false, ptr null, ptr null
2921
; CHECK-SLP-THRESHOLD-NEXT: br label [[BB:%.*]]
3022
; CHECK-SLP-THRESHOLD: bb:
31-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0
32-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer
33-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> <i64 12, i64 8, i64 4, i64 0>
34-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> splat (i1 true), <4 x i64> poison)
35-
; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer
23+
; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = call <13 x i64> @llvm.masked.load.v13i64.p0(ptr [[COND_IN_V]], i32 8, <13 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <13 x i64> poison)
24+
; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <13 x i64> [[TMP0]], <13 x i64> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
25+
; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
3626
; CHECK-SLP-THRESHOLD-NEXT: ret void
3727
;
3828
entry:

Diff for: llvm/test/Transforms/SLPVectorizer/X86/pr47623.ll

+6-10
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,16 @@ define void @foo() {
2424
; SSE-NEXT: ret void
2525
;
2626
; AVX-LABEL: @foo(
27-
; AVX-NEXT: [[TMP1:%.*]] = load i32, ptr @b, align 16
28-
; AVX-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @b, i64 8), align 8
29-
; AVX-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
30-
; AVX-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
31-
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
27+
; AVX-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr @b, align 16
28+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
29+
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
3230
; AVX-NEXT: store <8 x i32> [[TMP5]], ptr @a, align 16
3331
; AVX-NEXT: ret void
3432
;
3533
; AVX512-LABEL: @foo(
36-
; AVX512-NEXT: [[TMP1:%.*]] = load i32, ptr @b, align 16
37-
; AVX512-NEXT: [[TMP2:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @b, i64 8), align 8
38-
; AVX512-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
39-
; AVX512-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i64 1
40-
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
34+
; AVX512-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr @b, align 16
35+
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
36+
; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
4137
; AVX512-NEXT: store <8 x i32> [[TMP5]], ptr @a, align 16
4238
; AVX512-NEXT: ret void
4339
;

0 commit comments

Comments
 (0)