Skip to content

Commit 48bbd76

Browse files
committed
[SLP]Fix PR79229: Check that extractelement is used only in a single node
before erasing. Before trying to erase the extractelement instruction, not enough to check for single use, need to check that it is not used in several nodes because of the preliminary nodes reordering.
1 parent bb65f5a commit 48bbd76

File tree

2 files changed

+154
-1
lines changed

2 files changed

+154
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10218,7 +10218,16 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1021810218
// If the only one use is vectorized - can delete the extractelement
1021910219
// itself.
1022010220
if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
10221-
return !R.ScalarToTreeEntry.count(U);
10221+
const TreeEntry *UTE = R.getTreeEntry(U);
10222+
return !UTE || R.MultiNodeScalars.contains(U) ||
10223+
count_if(R.VectorizableTree,
10224+
[&](const std::unique_ptr<TreeEntry> &TE) {
10225+
return any_of(TE->UserTreeIndices,
10226+
[&](const EdgeInfo &Edge) {
10227+
return Edge.UserTE == UTE;
10228+
}) &&
10229+
is_contained(TE->Scalars, EI);
10230+
}) != 1;
1022210231
}))
1022310232
continue;
1022410233
R.eraseInstruction(EI);
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 -S < %s | FileCheck %s
3+
4+
define void @foo(double %i) {
5+
; CHECK-LABEL: define void @foo(
6+
; CHECK-SAME: double [[I:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: bb:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double 0.000000e+00>, double [[I]], i32 2
9+
; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> zeroinitializer, [[TMP0]]
10+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
11+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[I]], i32 0
12+
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]]
13+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
14+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> <i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
15+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> <double 0.000000e+00, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 8, i32 poison, i32 2, i32 poison, i32 poison, i32 13, i32 14, i32 15>
16+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> [[TMP7]], double [[TMP2]], i32 3
17+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 1>
18+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 5, i32 poison, i32 7>
19+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP5]], i32 6
20+
; CHECK-NEXT: [[TMP12:%.*]] = fmul <8 x double> [[TMP8]], [[TMP11]]
21+
; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> zeroinitializer, [[TMP12]]
22+
; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[TMP13]], zeroinitializer
23+
; CHECK-NEXT: [[TMP15:%.*]] = fcmp ult <8 x double> [[TMP14]], zeroinitializer
24+
; CHECK-NEXT: [[TMP16:%.*]] = freeze <8 x i1> [[TMP15]]
25+
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP16]])
26+
; CHECK-NEXT: br i1 [[TMP17]], label [[BB58:%.*]], label [[BB115:%.*]]
27+
; CHECK: bb115:
28+
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> zeroinitializer, [[TMP4]]
29+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
30+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[TMP18]], i32 1
31+
; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
32+
; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
33+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
34+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, <4 x double> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
35+
; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
36+
; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
37+
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
38+
; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer
39+
; CHECK-NEXT: [[TMP28:%.*]] = fmul <4 x double> [[TMP27]], zeroinitializer
40+
; CHECK-NEXT: [[TMP29:%.*]] = fptosi <4 x double> [[TMP28]] to <4 x i32>
41+
; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i32> zeroinitializer, [[TMP29]]
42+
; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP30]])
43+
; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 [[TMP31]], 32000
44+
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP31]], i32 32000
45+
; CHECK-NEXT: [[I163:%.*]] = fcmp ogt double [[I118]], 0.000000e+00
46+
; CHECK-NEXT: [[I164:%.*]] = icmp slt i32 0, [[OP_RDX1]]
47+
; CHECK-NEXT: unreachable
48+
; CHECK: bb58:
49+
; CHECK-NEXT: ret void
50+
;
51+
bb:
52+
%i75 = fsub double 0.000000e+00, 0.000000e+00
53+
%i76 = fsub double 0.000000e+00, 0.000000e+00
54+
%i77 = fmul double 0.000000e+00, %i75
55+
%i78 = fmul double 0.000000e+00, %i76
56+
%i79 = fadd double %i78, 0.000000e+00
57+
%i80 = fadd double %i79, 0.000000e+00
58+
%i81 = fcmp ult double %i80, 0.000000e+00
59+
%i82 = fsub double 0.000000e+00, poison
60+
%i83 = fmul double 0.000000e+00, %i82
61+
%i84 = fadd double 0.000000e+00, %i83
62+
%i85 = fadd double %i84, 0.000000e+00
63+
%i86 = fcmp ult double %i85, 0.000000e+00
64+
%i87 = fsub double 0.000000e+00, %i
65+
%i88 = fadd double 0.000000e+00, %i77
66+
%i89 = fadd double %i88, 0.000000e+00
67+
%i90 = fcmp ult double %i89, 0.000000e+00
68+
%i91 = fsub double 0.000000e+00, 0.000000e+00
69+
%i92 = fmul double poison, 0.000000e+00
70+
%i93 = fadd double %i92, 0.000000e+00
71+
%i94 = fadd double %i93, 0.000000e+00
72+
%i95 = fcmp ult double %i94, 0.000000e+00
73+
%i96 = fadd double %i79, 0.000000e+00
74+
%i97 = fcmp ult double %i96, 0.000000e+00
75+
%i98 = fadd double %i84, 0.000000e+00
76+
%i99 = fcmp ult double %i98, 0.000000e+00
77+
%i100 = fadd double 0.000000e+00, %i77
78+
%i101 = fadd double %i100, 0.000000e+00
79+
%i102 = fcmp ult double %i101, 0.000000e+00
80+
%i103 = fsub double 0.000000e+00, %i
81+
%i104 = fmul double poison, 0.000000e+00
82+
%i105 = fadd double %i104, 0.000000e+00
83+
%i106 = fadd double %i105, 0.000000e+00
84+
%i107 = fcmp ult double %i106, 0.000000e+00
85+
%i108 = select i1 %i107, i1 %i102, i1 false
86+
%i109 = select i1 %i108, i1 %i99, i1 false
87+
%i110 = select i1 %i109, i1 %i97, i1 false
88+
%i111 = select i1 %i110, i1 %i95, i1 false
89+
%i112 = select i1 %i111, i1 %i90, i1 false
90+
%i113 = select i1 %i112, i1 %i86, i1 false
91+
%i114 = select i1 %i113, i1 %i81, i1 false
92+
br i1 %i114, label %bb58, label %bb115
93+
94+
bb115:
95+
%i116 = fmul double 0.000000e+00, %i103
96+
%i117 = fmul double 0.000000e+00, %i82
97+
%i118 = fadd double %i116, %i117
98+
%i120 = fmul double 0.000000e+00, %i75
99+
%i121 = fmul double 0.000000e+00, %i76
100+
%i122 = fadd double %i121, 0.000000e+00
101+
%i123 = fadd double 0.000000e+00, %i120
102+
%i124 = fmul double 0.000000e+00, %i91
103+
%i125 = fadd double %i124, %i82
104+
%i126 = fadd double %i125, 0.000000e+00
105+
%i127 = fmul double 0.000000e+00, %i87
106+
%i128 = fadd double %i127, 0.000000e+00
107+
%i129 = fadd double %i128, 0.000000e+00
108+
%i130 = fadd double %i122, 0.000000e+00
109+
%i131 = fadd double %i123, 0.000000e+00
110+
%i132 = select i1 false, double 0.000000e+00, double %i131
111+
%i133 = fmul double %i132, 0.000000e+00
112+
%i134 = fmul double %i133, 0.000000e+00
113+
%i135 = fptosi double %i134 to i32
114+
%i136 = or i32 0, %i135
115+
%i137 = icmp slt i32 %i136, 32000
116+
%i138 = select i1 %i137, i32 %i136, i32 32000
117+
%i139 = select i1 false, double 0.000000e+00, double %i130
118+
%i140 = fmul double %i139, 0.000000e+00
119+
%i141 = fmul double %i140, 0.000000e+00
120+
%i142 = fptosi double %i141 to i32
121+
%i143 = or i32 0, %i142
122+
%i144 = icmp slt i32 %i143, %i138
123+
%i145 = select i1 %i144, i32 %i143, i32 %i138
124+
%i146 = select i1 false, double 0.000000e+00, double %i129
125+
%i147 = fmul double %i146, 0.000000e+00
126+
%i148 = fmul double %i147, 0.000000e+00
127+
%i149 = fptosi double %i148 to i32
128+
%i150 = or i32 0, %i149
129+
%i151 = icmp slt i32 %i150, %i145
130+
%i152 = select i1 %i151, i32 %i150, i32 %i145
131+
%i153 = select i1 false, double 0.000000e+00, double %i126
132+
%i154 = fmul double %i153, 0.000000e+00
133+
%i155 = fmul double %i154, 0.000000e+00
134+
%i156 = fptosi double %i155 to i32
135+
%i157 = or i32 0, %i156
136+
%i158 = icmp slt i32 %i157, %i152
137+
%i159 = select i1 %i158, i32 %i157, i32 %i152
138+
%i163 = fcmp ogt double %i118, 0.000000e+00
139+
%i164 = icmp slt i32 0, %i159
140+
unreachable
141+
142+
bb58:
143+
ret void
144+
}

0 commit comments

Comments
 (0)