Skip to content

Commit 6fe21bc

Browse files
committed
[SLP]Fix PR79229: Do not erase extractelement, if it used in
multiregister node. If the node can be span between several registers and same extractelement instruction is used in several parts, it may be required to keep such extractelement instruction to avoid compiler crash.
1 parent c3f7fb1 commit 6fe21bc

File tree

2 files changed

+109
-1
lines changed

2 files changed

+109
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10217,7 +10217,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1021710217
UniqueBases.insert(VecBase);
1021810218
// If the only one use is vectorized - can delete the extractelement
1021910219
// itself.
10220-
if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
10220+
if (!EI->hasOneUse() || (NumParts != 1 && count(E->Scalars, EI) > 1) ||
10221+
any_of(EI->users(), [&](User *U) {
1022110222
const TreeEntry *UTE = R.getTreeEntry(U);
1022210223
return !UTE || R.MultiNodeScalars.contains(U) ||
1022310224
count_if(R.VectorizableTree,
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 < %s | FileCheck %s
3+
4+
define void @test(double %i) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: bb:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0
9+
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> zeroinitializer, [[TMP0]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[I]], i32 1
11+
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
12+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
14+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP3]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 3>
15+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
16+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 5, i32 6, i32 poison>
17+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 5, i32 6, i32 poison>
18+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP4]], i32 7
19+
; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
20+
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
21+
; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
22+
; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
23+
; CHECK-NEXT: br label [[BB116:%.*]]
24+
; CHECK: bb116:
25+
; CHECK-NEXT: [[TMP15:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]]
26+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP15]], i32 0
27+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[TMP15]], i32 1
28+
; CHECK-NEXT: [[I120:%.*]] = fadd double [[TMP16]], [[TMP17]]
29+
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
30+
; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP3]]
31+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
32+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
33+
; CHECK-NEXT: [[I128:%.*]] = fadd double [[TMP20]], [[TMP21]]
34+
; CHECK-NEXT: [[I139:%.*]] = call double @llvm.maxnum.f64(double [[I128]], double 0.000000e+00)
35+
; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP19]], zeroinitializer
36+
; CHECK-NEXT: [[TMP23:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP22]], <2 x double> zeroinitializer)
37+
; CHECK-NEXT: [[TMP24:%.*]] = fmul <2 x double> [[TMP23]], zeroinitializer
38+
; CHECK-NEXT: [[TMP25:%.*]] = fptosi <2 x double> [[TMP24]] to <2 x i32>
39+
; CHECK-NEXT: [[TMP26:%.*]] = sub <2 x i32> zeroinitializer, [[TMP25]]
40+
; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt <2 x i32> [[TMP26]], zeroinitializer
41+
; CHECK-NEXT: [[I147:%.*]] = fcmp ogt double [[I120]], 0.000000e+00
42+
; CHECK-NEXT: ret void
43+
;
44+
bb:
45+
%i74 = fsub double 0.000000e+00, poison
46+
%i75 = fsub double 0.000000e+00, %i
47+
%i76 = fmul double 0.000000e+00, %i75
48+
%i77 = fadd double %i76, 0.000000e+00
49+
%i78 = fadd double %i77, 0.000000e+00
50+
%i79 = fcmp ult double %i78, 0.000000e+00
51+
%i81 = fsub double %i, 0.000000e+00
52+
%i82 = fmul double 0.000000e+00, %i81
53+
%i83 = fadd double 0.000000e+00, %i82
54+
%i84 = fadd double %i83, 0.000000e+00
55+
%i85 = fcmp ult double %i84, 0.000000e+00
56+
%i86 = fsub double 0.000000e+00, %i
57+
%i87 = fmul double 0.000000e+00, %i86
58+
%i88 = fadd double %i87, 0.000000e+00
59+
%i89 = fadd double %i88, 0.000000e+00
60+
%i90 = fcmp ult double %i89, 0.000000e+00
61+
%i91 = fsub double 0.000000e+00, 0.000000e+00
62+
%i92 = fmul double 0.000000e+00, 0.000000e+00
63+
%i93 = fadd double %i92, 0.000000e+00
64+
%i94 = fadd double %i93, 0.000000e+00
65+
%i95 = fcmp ult double %i94, 0.000000e+00
66+
%i96 = fsub double poison, 0.000000e+00
67+
%i97 = fadd double %i77, 0.000000e+00
68+
%i98 = fcmp ult double %i97, 0.000000e+00
69+
%i99 = fadd double %i83, 0.000000e+00
70+
%i100 = fcmp ult double %i99, 0.000000e+00
71+
%i101 = fmul double 0.000000e+00, 0.000000e+00
72+
%i102 = fadd double %i101, 0.000000e+00
73+
%i103 = fadd double %i102, 0.000000e+00
74+
%i104 = fcmp ult double %i103, 0.000000e+00
75+
%i105 = fmul double 0.000000e+00, 0.000000e+00
76+
%i106 = fadd double %i105, 0.000000e+00
77+
%i107 = fadd double %i106, 0.000000e+00
78+
%i108 = fcmp ult double %i107, 0.000000e+00
79+
br label %bb116
80+
81+
bb116:
82+
%i117 = fmul double 0.000000e+00, %i81
83+
%i119 = fmul double 0.000000e+00, %i96
84+
%i120 = fadd double %i117, %i119
85+
%i121 = fmul double 0.000000e+00, %i74
86+
%i122 = fmul double 0.000000e+00, %i75
87+
%i123 = fadd double %i122, 0.000000e+00
88+
%i124 = fmul double 0.000000e+00, %i91
89+
%i125 = fadd double %i124, 0.000000e+00
90+
%i127 = fmul double 0.000000e+00, %i86
91+
%i128 = fadd double %i127, %i121
92+
%i133 = call double @llvm.maxnum.f64(double %i123, double 0.000000e+00)
93+
%i134 = fmul double %i133, 0.000000e+00
94+
%i135 = fptosi double %i134 to i32
95+
%i136 = sub i32 0, %i135
96+
%i137 = icmp sgt i32 %i136, 0
97+
%i139 = call double @llvm.maxnum.f64(double %i128, double 0.000000e+00)
98+
%i142 = call double @llvm.maxnum.f64(double %i125, double 0.000000e+00)
99+
%i143 = fmul double %i142, 0.000000e+00
100+
%i144 = fptosi double %i143 to i32
101+
%i145 = sub i32 0, %i144
102+
%i146 = icmp sgt i32 %i145, 0
103+
%i147 = fcmp ogt double %i120, 0.000000e+00
104+
ret void
105+
}
106+
107+
declare double @llvm.maxnum.f64(double, double)

0 commit comments

Comments
 (0)