@@ -91,7 +91,7 @@ for.body: ; preds = %for.body.preheader,
91
91
br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
92
92
}
93
93
94
- define void @thirdorderrec (ptr nocapture noundef readonly %x , ptr noalias nocapture noundef writeonly %y , i32 noundef %n ) # 0 {
94
+ define void @thirdorderrec (ptr nocapture noundef readonly %x , ptr noalias nocapture noundef writeonly %y , i32 noundef %n ) {
95
95
; CHECK-LABEL: @thirdorderrec(
96
96
; CHECK-NEXT: entry:
97
97
; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 3
@@ -352,3 +352,101 @@ loop:
352
352
exit:
353
353
ret void
354
354
}
355
+
356
+ define void @test_for_tried_to_force_scalar (ptr noalias %A , ptr noalias %B , ptr noalias %C , i64 %n ) #0 {
357
+ ; CHECK-LABEL: @test_for_tried_to_force_scalar(
358
+ ; CHECK-NEXT: entry:
359
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
360
+ ; CHECK-NEXT: [[CONFLICT_RDX20:%.*]] = icmp ule i64 [[TMP0]], 8
361
+ ; CHECK-NEXT: br i1 [[CONFLICT_RDX20]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
362
+ ; CHECK: vector.ph:
363
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
364
+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
365
+ ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]
366
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP4]]
367
+ ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x ptr> poison, ptr [[A:%.*]], i32 3
368
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
369
+ ; CHECK: vector.body:
370
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
371
+ ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
372
+ ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
373
+ ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
374
+ ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
375
+ ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
376
+ ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 4
377
+ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
378
+ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
379
+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
380
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP5]]
381
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
382
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
383
+ ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
384
+ ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP13]], i32 0
385
+ ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x ptr> [[TMP17]], ptr [[TMP14]], i32 1
386
+ ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP18]], ptr [[TMP15]], i32 2
387
+ ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP16]], i32 3
388
+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
389
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
390
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
391
+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
392
+ ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0
393
+ ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1
394
+ ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2
395
+ ; CHECK-NEXT: [[TMP28]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3
396
+ ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
397
+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4
398
+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
399
+ ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
400
+ ; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
401
+ ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 0
402
+ ; CHECK-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP31]], align 4
403
+ ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 1
404
+ ; CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP33]], align 4
405
+ ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 2
406
+ ; CHECK-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP35]], align 4
407
+ ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x ptr> [[TMP29]], i32 3
408
+ ; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP37]], align 4
409
+ ; CHECK-NEXT: store float [[TMP36]], ptr [[B:%.*]], align 4
410
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
411
+ ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
412
+ ; CHECK-NEXT: br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
413
+ ; CHECK: middle.block:
414
+ ; CHECK-NEXT: br label [[SCALAR_PH]]
415
+ ; CHECK: scalar.ph:
416
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
417
+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
418
+ ; CHECK-NEXT: br label [[LOOP:%.*]]
419
+ ; CHECK: loop:
420
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
421
+ ; CHECK-NEXT: [[PREV:%.*]] = phi ptr [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[NEXT:%.*]], [[LOOP]] ]
422
+ ; CHECK-NEXT: [[NEXT]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[IV]]
423
+ ; CHECK-NEXT: [[TMP40:%.*]] = load float, ptr [[NEXT]], align 4
424
+ ; CHECK-NEXT: store float [[TMP40]], ptr [[C]], align 4
425
+ ; CHECK-NEXT: [[TMP41:%.*]] = load float, ptr [[PREV]], align 4
426
+ ; CHECK-NEXT: store float [[TMP41]], ptr [[B]], align 4
427
+ ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
428
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
429
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
430
+ ; CHECK: exit:
431
+ ; CHECK-NEXT: ret void
432
+ ;
433
+ entry:
434
+ br label %loop
435
+
436
+ loop:
437
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
438
+ %prev = phi ptr [ %A , %entry ], [ %next , %loop ]
439
+ %next = getelementptr nusw [3 x float ], ptr %A , i64 %iv
440
+ %0 = load float , ptr %next , align 4
441
+ store float %0 , ptr %C , align 4
442
+ %1 = load float , ptr %prev , align 4
443
+ store float %1 , ptr %B , align 4
444
+ %iv.next = add nsw i64 %iv , 1
445
+ %exitcond.not = icmp eq i64 %iv , %n
446
+ br i1 %exitcond.not , label %exit , label %loop
447
+
448
+ exit:
449
+ ret void
450
+ }
451
+
452
+ attributes #0 = { "target-cpu" ="znver3" }
0 commit comments