@@ -326,34 +326,40 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
326
326
; CHECK: vector.body:
327
327
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
328
328
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
329
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16
329
330
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
330
- ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
331
- ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
332
- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
333
- ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP6]], align 2
334
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
335
- ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
336
- ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
331
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
332
+ ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
333
+ ; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i16>
334
+ ; CHECK-NEXT: [[TMP7:%.*]] = mul <16 x i16> [[TMP2]], [[TMP5]]
335
+ ; CHECK-NEXT: [[TMP8:%.*]] = mul <16 x i16> [[TMP2]], [[TMP6]]
336
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
337
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i64 16
338
+ ; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP9]], align 2
339
+ ; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP10]], align 2
340
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
341
+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
342
+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
337
343
; CHECK: middle.block:
338
344
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
339
345
; CHECK: vec.epilog.iter.check:
340
346
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
341
347
; CHECK: vec.epilog.ph:
342
- ; CHECK-NEXT: [[TMP8 :%.*]] = trunc i32 [[INVAR_DIV]] to i16
343
- ; CHECK-NEXT: [[TMP9 :%.*]] = insertelement <4 x i16> poison, i16 [[TMP8 ]], i64 0
344
- ; CHECK-NEXT: [[TMP10 :%.*]] = shufflevector <4 x i16> [[TMP9 ]], <4 x i16> poison, <4 x i32> zeroinitializer
348
+ ; CHECK-NEXT: [[TMP12 :%.*]] = trunc i32 [[INVAR_DIV]] to i16
349
+ ; CHECK-NEXT: [[TMP13 :%.*]] = insertelement <4 x i16> poison, i16 [[TMP12 ]], i64 0
350
+ ; CHECK-NEXT: [[TMP14 :%.*]] = shufflevector <4 x i16> [[TMP13 ]], <4 x i16> poison, <4 x i32> zeroinitializer
345
351
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
346
352
; CHECK: vec.epilog.vector.body:
347
- ; CHECK-NEXT: [[INDEX3 :%.*]] = phi i64 [ 96, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
348
- ; CHECK-NEXT: [[TMP11 :%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX3 ]]
349
- ; CHECK-NEXT: [[WIDE_LOAD4 :%.*]] = load <4 x i8>, ptr [[TMP11 ]], align 1
350
- ; CHECK-NEXT: [[TMP12 :%.*]] = zext <4 x i8> [[WIDE_LOAD4 ]] to <4 x i16>
351
- ; CHECK-NEXT: [[TMP13 :%.*]] = mul <4 x i16> [[TMP10 ]], [[TMP12 ]]
352
- ; CHECK-NEXT: [[TMP14 :%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX3 ]]
353
- ; CHECK-NEXT: store <4 x i16> [[TMP13 ]], ptr [[TMP14 ]], align 2
354
- ; CHECK-NEXT: [[INDEX_NEXT5 ]] = add nuw i64 [[INDEX3 ]], 4
355
- ; CHECK-NEXT: [[TMP15 :%.*]] = icmp eq i64 [[INDEX_NEXT5 ]], 100
356
- ; CHECK-NEXT: br i1 [[TMP15 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
353
+ ; CHECK-NEXT: [[INDEX4 :%.*]] = phi i64 [ 96, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
354
+ ; CHECK-NEXT: [[TMP15 :%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX4 ]]
355
+ ; CHECK-NEXT: [[WIDE_LOAD5 :%.*]] = load <4 x i8>, ptr [[TMP15 ]], align 1
356
+ ; CHECK-NEXT: [[TMP16 :%.*]] = zext <4 x i8> [[WIDE_LOAD5 ]] to <4 x i16>
357
+ ; CHECK-NEXT: [[TMP17 :%.*]] = mul <4 x i16> [[TMP14 ]], [[TMP16 ]]
358
+ ; CHECK-NEXT: [[TMP18 :%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX4 ]]
359
+ ; CHECK-NEXT: store <4 x i16> [[TMP17 ]], ptr [[TMP18 ]], align 2
360
+ ; CHECK-NEXT: [[INDEX_NEXT6 ]] = add nuw i64 [[INDEX4 ]], 4
361
+ ; CHECK-NEXT: [[TMP19 :%.*]] = icmp eq i64 [[INDEX_NEXT6 ]], 100
362
+ ; CHECK-NEXT: br i1 [[TMP19 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
357
363
; CHECK: vec.epilog.middle.block:
358
364
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
359
365
; CHECK: vec.epilog.scalar.ph:
0 commit comments