@@ -209,3 +209,48 @@ loop:
209
209
call void @blackhole (<2 x i8* > %e6 )
210
210
br label %loop
211
211
}
212
+
213
+ ; Avoid folding the GEP outside the loop to inside, and increasing loop
214
+ ; instruction count.
215
+ define float @gep_cross_loop (i64* %_arg_ , float * %_arg_3 , float %_arg_8 )
216
+ ; CHECK-LABEL: @gep_cross_loop(
217
+ ; CHECK-NEXT: entry:
218
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[_ARG_:%.*]], align 8
219
+ ; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
220
+ ; CHECK: for.cond.i:
221
+ ; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD11_I:%.*]], [[FOR_BODY_I:%.*]] ]
222
+ ; CHECK-NEXT: [[SUM:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I:%.*]], [[FOR_BODY_I]] ]
223
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 17
224
+ ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_I]], label [[FOR_COND_I_I_I_PREHEADER:%.*]]
225
+ ; CHECK: for.cond.i.i.i.preheader:
226
+ ; CHECK-NEXT: ret float [[SUM]]
227
+ ; CHECK: for.body.i:
228
+ ; CHECK-NEXT: [[ARRAYIDX_I84_I_IDX:%.*]] = add nsw i64 [[IDX]], [[TMP0]]
229
+ ; CHECK-NEXT: [[ARRAYIDX_I84_I:%.*]] = getelementptr inbounds float, float* [[_ARG_3:%.*]], i64 [[ARRAYIDX_I84_I_IDX]]
230
+ ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_I84_I]], align 4
231
+ ; CHECK-NEXT: [[ADD_I]] = fadd fast float [[SUM]], [[TMP1]]
232
+ ; CHECK-NEXT: [[ADD11_I]] = add nuw nsw i64 [[IDX]], 1
233
+ ; CHECK-NEXT: br label [[FOR_COND_I]]
234
+ ;
235
+ {
236
+ entry:
237
+ %0 = load i64 , i64* %_arg_ , align 8
238
+ %add.ptr = getelementptr inbounds float , float * %_arg_3 , i64 %0
239
+ br label %for.cond.i
240
+
241
+ for.cond.i: ; preds = %for.body.i, %entry
242
+ %idx = phi i64 [ 0 , %entry ], [ %add11.i , %for.body.i ]
243
+ %sum = phi float [ 0 .000000e+00 , %entry ], [ %add.i , %for.body.i ]
244
+ %cmp = icmp ule i64 %idx , 16
245
+ br i1 %cmp , label %for.body.i , label %for.cond.i.i.i.preheader
246
+
247
+ for.cond.i.i.i.preheader: ; preds = %for.cond.i
248
+ ret float %sum
249
+
250
+ for.body.i: ; preds = %for.cond.i
251
+ %arrayidx.i84.i = getelementptr inbounds float , float * %add.ptr , i64 %idx
252
+ %1 = load float , float * %arrayidx.i84.i , align 4
253
+ %add.i = fadd fast float %sum , %1
254
+ %add11.i = add nsw i64 %idx , 1
255
+ br label %for.cond.i
256
+ }
0 commit comments