Skip to content

Commit 7775a48

Browse files
[LLVM][TTI][SME] Allow optional auto-vectorisation for streaming functions. (#101679)
The command line option enable-scalable-autovec-in-streaming-mode is used to enable scalable vectors but the same check is missing from enableScalableVectorization, which is blocking auto-vectorisation.
1 parent fe85566 commit 7775a48

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,6 +2342,11 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
23422342
return std::nullopt;
23432343
}
23442344

2345+
bool AArch64TTIImpl::enableScalableVectorization() const {
2346+
return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
2347+
EnableScalableAutovecInStreamingMode);
2348+
}
2349+
23452350
TypeSize
23462351
AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
23472352
switch (K) {

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
381381
return ST->isSVEorStreamingSVEAvailable();
382382
}
383383

384-
bool enableScalableVectorization() const { return ST->isSVEAvailable(); }
384+
bool enableScalableVectorization() const;
385385

386386
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
387387
ElementCount VF) const;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NOVEC
3+
; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -enable-scalable-autovec-in-streaming-mode < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,VEC
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
define void @normal_function(ptr %a, ptr %b, ptr %c) #0 {
8+
; CHECK: LV: Checking a loop in 'normal_function'
9+
; CHECK: LV: Scalable vectorization is available
10+
entry:
11+
br label %loop
12+
13+
loop:
14+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
15+
%arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
16+
%0 = load i32, ptr %arrayidx, align 4
17+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
18+
%1 = load i8, ptr %arrayidx2, align 4
19+
%zext = zext i8 %1 to i32
20+
%add = add nsw i32 %zext, %0
21+
%arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
22+
store i32 %add, ptr %arrayidx5, align 4
23+
%iv.next = add nuw nsw i64 %iv, 1
24+
%exitcond.not = icmp eq i64 %iv.next, 1024
25+
br i1 %exitcond.not, label %exit, label %loop
26+
27+
exit:
28+
ret void
29+
}
30+
31+
define void @streaming_function(ptr %a, ptr %b, ptr %c) #0 "aarch64_pstate_sm_enabled" {
32+
; CHECK: LV: Checking a loop in 'streaming_function'
33+
; VEC: LV: Scalable vectorization is available
34+
; NOVEC: LV: Scalable vectorization is explicitly disabled
35+
entry:
36+
br label %loop
37+
38+
loop:
39+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
40+
%arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
41+
%0 = load i32, ptr %arrayidx, align 4
42+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
43+
%1 = load i8, ptr %arrayidx2, align 4
44+
%zext = zext i8 %1 to i32
45+
%add = add nsw i32 %zext, %0
46+
%arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
47+
store i32 %add, ptr %arrayidx5, align 4
48+
%iv.next = add nuw nsw i64 %iv, 1
49+
%exitcond.not = icmp eq i64 %iv.next, 1024
50+
br i1 %exitcond.not, label %exit, label %loop
51+
52+
exit:
53+
ret void
54+
}
55+
56+
attributes #0 = { vscale_range(1, 16) "target-features"="+sve,+sme" }

0 commit comments

Comments
 (0)