-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LV][VP][NFC]Add tests for safe store/load forwarding/dependence distance. #100635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LV][VP][NFC]Add tests for safe store/load forwarding/dependence distance. #100635
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesPatch is 22.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100635.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
new file mode 100644
index 0000000000000..ca9207be670ed
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
@@ -0,0 +1,418 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL
+
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP
+
+; Dependence distance between read and write is greater than the trip
+; count of the loop. Thus, values written are never read for any
+; valid vectorization of the loop.
+define void @test(ptr %p) {
+; IF-EVL-LABEL: @test(
+; IF-EVL-NEXT: entry:
+; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]]
+; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
+; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
+; IF-EVL: vector.body:
+; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
+; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
+; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP9]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 200
+; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
+; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL: middle.block:
+; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; IF-EVL: scalar.ph:
+; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IF-EVL-NEXT: br label [[LOOP:%.*]]
+; IF-EVL: loop:
+; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
+; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200
+; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; IF-EVL: exit:
+; IF-EVL-NEXT: ret void
+;
+; NO-VP-LABEL: @test(
+; NO-VP-NEXT: entry:
+; NO-VP-NEXT: br label [[LOOP:%.*]]
+; NO-VP: loop:
+; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]]
+; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200
+; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; NO-VP: exit:
+; NO-VP-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [0, %entry], [%iv.next, %loop]
+ %a1 = getelementptr i64, ptr %p, i64 %iv
+ %v = load i64, ptr %a1, align 32
+ %offset = add i64 %iv, 200
+ %a2 = getelementptr i64, ptr %p, i64 %offset
+ store i64 %v, ptr %a2, align 32
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ne i64 %iv, 199
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Dependence distance is less than trip count, thus we must prove that
+; chosen VF guaranteed to be less than dependence distance.
+define void @test_may_clobber(ptr %p) {
+; IF-EVL-LABEL: @test_may_clobber(
+; IF-EVL-NEXT: entry:
+; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
+; IF-EVL: vector.body:
+; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
+; IF-EVL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
+; IF-EVL-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100
+; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
+; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
+; IF-EVL-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32
+; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
+; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL: middle.block:
+; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; IF-EVL: scalar.ph:
+; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IF-EVL-NEXT: br label [[LOOP:%.*]]
+; IF-EVL: loop:
+; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
+; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100
+; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; IF-EVL: exit:
+; IF-EVL-NEXT: ret void
+;
+; NO-VP-LABEL: @test_may_clobber(
+; NO-VP-NEXT: entry:
+; NO-VP-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; NO-VP: vector.ph:
+; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
+; NO-VP: vector.body:
+; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
+; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
+; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100
+; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
+; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
+; NO-VP-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32
+; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
+; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; NO-VP: middle.block:
+; NO-VP-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; NO-VP: scalar.ph:
+; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; NO-VP-NEXT: br label [[LOOP:%.*]]
+; NO-VP: loop:
+; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
+; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100
+; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; NO-VP: exit:
+; NO-VP-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [0, %entry], [%iv.next, %loop]
+ %a1 = getelementptr i64, ptr %p, i64 %iv
+ %v = load i64, ptr %a1, align 32
+ %offset = add i64 %iv, 100
+ %a2 = getelementptr i64, ptr %p, i64 %offset
+ store i64 %v, ptr %a2, align 32
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ne i64 %iv, 199
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Trviailly no overlap due to maximum possible value of VLEN and LMUL
+define void @trivial_due_max_vscale(ptr %p) {
+; IF-EVL-LABEL: @trivial_due_max_vscale(
+; IF-EVL-NEXT: entry:
+; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 200, [[TMP2]]
+; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
+; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
+; IF-EVL: vector.body:
+; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 200, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true)
+; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP7]]
+; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0
+; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP9]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[TMP7]], 8192
+; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
+; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP_LOAD]], ptr align 32 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IF-EVL: middle.block:
+; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; IF-EVL: scalar.ph:
+; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IF-EVL-NEXT: br label [[LOOP:%.*]]
+; IF-EVL: loop:
+; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
+; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192
+; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; IF-EVL: exit:
+; IF-EVL-NEXT: ret void
+;
+; NO-VP-LABEL: @trivial_due_max_vscale(
+; NO-VP-NEXT: entry:
+; NO-VP-NEXT: br label [[LOOP:%.*]]
+; NO-VP: loop:
+; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]]
+; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192
+; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199
+; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; NO-VP: exit:
+; NO-VP-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [0, %entry], [%iv.next, %loop]
+ %a1 = getelementptr i64, ptr %p, i64 %iv
+ %v = load i64, ptr %a1, align 32
+ %offset = add i64 %iv, 8192
+ %a2 = getelementptr i64, ptr %p, i64 %offset
+ store i64 %v, ptr %a2, align 32
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ne i64 %iv, 199
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Dependence distance could be violated via LMUL>=2 or interleaving
+define void @no_high_lmul_or_interleave(ptr %p) {
+; IF-EVL-LABEL: @no_high_lmul_or_interleave(
+; IF-EVL-NEXT: entry:
+; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
+; IF-EVL: vector.body:
+; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 3001, i64 3001, i64 3001, i64 3001>
+; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
+; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
+; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[TMP3]], i32 32, <4 x i1> [[TMP1]], <4 x i64> poison)
+; IF-EVL-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 1024
+; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP4]]
+; IF-EVL-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
+; IF-EVL-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_MASKED_LOAD]], ptr [[TMP6]], i32 32, <4 x i1> [[TMP1]])
+; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 3004
+; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; IF-EVL: middle.block:
+; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; IF-EVL: scalar.ph:
+; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; IF-EVL-NEXT: br label [[LOOP:%.*]]
+; IF-EVL: loop:
+; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
+; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024
+; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 3001
+; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; IF-EVL: exit:
+; IF-EVL-NEXT: ret void
+;
+; NO-VP-LABEL: @no_high_lmul_or_interleave(
+; NO-VP-NEXT: entry:
+; NO-VP-NEXT: br label [[LOOP:%.*]]
+; NO-VP: loop:
+; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[IV]]
+; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32
+; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024
+; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]]
+; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32
+; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 3001
+; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; NO-VP: exit:
+; NO-VP-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [0, %entry], [%iv.next, %loop]
+ %a1 = getelementptr i64, ptr %p, i64 %iv
+ %v = load i64, ptr %a1, align 32
+ %offset = add i64 %iv, 1024
+ %a2 = getelementptr i64, ptr %p, i64 %offset
+ store i64 %v, ptr %a2, align 32
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp ne i64 %iv, 3001
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @non-power-2-storeloadforward(ptr %A) {
+; IF-EVL-LABEL: @non-power-2-storeloadforward(
+; IF-EVL-NEXT: entry:
+; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
+; IF-EVL: for.body:
+; IF-EVL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 16, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP0:%.*]] = add nsw i64 [[INDVARS_IV]], -3
+; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; IF-EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; IF-EVL-NEXT: [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV]], 4
+; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; IF-EVL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; IF-EVL-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
+; IF-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; IF-EVL-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX5]], align 4
+;...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, with suggestions inline
loop: | ||
%iv = phi i64 [0, %entry], [%iv.next, %loop] | ||
%a1 = getelementptr i64, ptr %p, i64 %iv | ||
%v = load i64, ptr %a1, align 32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
smaller alignment sufficient?
%iv = phi i64 [0, %entry], [%iv.next, %loop] | ||
%a1 = getelementptr i64, ptr %p, i64 %iv | ||
%v = load i64, ptr %a1, align 32 | ||
%offset = add i64 %iv, 100 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Distance is quite far for the tests, might be good to have a closer one as well?
br label %for.body | ||
|
||
for.body: | ||
%indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
%indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ] | |
%iv = phi i64 [ 16, %entry ], [ %iv.next, %for.body ] |
Created using spr 1.3.5
No description provided.