Skip to content

Commit 0baacd1

Browse files
authored
[flang][OpenMP] Support MLIR lowering of linear clause for omp.wsloop (#139385)
This patch adds support for MLIR lowering of linear clause on omp.wsloop (except for linear modifiers).
1 parent 67f4d84 commit 0baacd1

File tree

10 files changed

+385
-19
lines changed

10 files changed

+385
-19
lines changed

flang/lib/Lower/OpenMP/ClauseProcessor.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,40 @@ bool ClauseProcessor::processIsDevicePtr(
10801080
});
10811081
}
10821082

1083+
bool ClauseProcessor::processLinear(mlir::omp::LinearClauseOps &result) const {
1084+
lower::StatementContext stmtCtx;
1085+
return findRepeatableClause<
1086+
omp::clause::Linear>([&](const omp::clause::Linear &clause,
1087+
const parser::CharBlock &) {
1088+
auto &objects = std::get<omp::ObjectList>(clause.t);
1089+
for (const omp::Object &object : objects) {
1090+
semantics::Symbol *sym = object.sym();
1091+
const mlir::Value variable = converter.getSymbolAddress(*sym);
1092+
result.linearVars.push_back(variable);
1093+
}
1094+
if (objects.size()) {
1095+
if (auto &mod =
1096+
std::get<std::optional<omp::clause::Linear::StepComplexModifier>>(
1097+
clause.t)) {
1098+
mlir::Value operand =
1099+
fir::getBase(converter.genExprValue(toEvExpr(*mod), stmtCtx));
1100+
result.linearStepVars.append(objects.size(), operand);
1101+
} else if (std::get<std::optional<omp::clause::Linear::LinearModifier>>(
1102+
clause.t)) {
1103+
mlir::Location currentLocation = converter.getCurrentLocation();
1104+
TODO(currentLocation, "Linear modifiers not yet implemented");
1105+
} else {
1106+
// If nothing is present, add the default step of 1.
1107+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1108+
mlir::Location currentLocation = converter.getCurrentLocation();
1109+
mlir::Value operand = firOpBuilder.createIntegerConstant(
1110+
currentLocation, firOpBuilder.getI32Type(), 1);
1111+
result.linearStepVars.append(objects.size(), operand);
1112+
}
1113+
}
1114+
});
1115+
}
1116+
10831117
bool ClauseProcessor::processLink(
10841118
llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
10851119
return findRepeatableClause<omp::clause::Link>(

flang/lib/Lower/OpenMP/ClauseProcessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ class ClauseProcessor {
128128
bool processIsDevicePtr(
129129
mlir::omp::IsDevicePtrClauseOps &result,
130130
llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
131+
bool processLinear(mlir::omp::LinearClauseOps &result) const;
131132
bool
132133
processLink(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
133134

flang/lib/Lower/OpenMP/DataSharingProcessor.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,14 +213,15 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
213213
// so, we won't need to explicitely handle block objects (or forget to do
214214
// so).
215215
for (auto *sym : explicitlyPrivatizedSymbols)
216-
allPrivatizedSymbols.insert(sym);
216+
if (!sym->test(Fortran::semantics::Symbol::Flag::OmpLinear))
217+
allPrivatizedSymbols.insert(sym);
217218
}
218219

219220
bool DataSharingProcessor::needBarrier() {
220221
// Emit implicit barrier to synchronize threads and avoid data races on
221222
// initialization of firstprivate variables and post-update of lastprivate
222223
// variables.
223-
// Emit implicit barrier for linear clause. Maybe on somewhere else.
224+
// Emit implicit barrier for linear clause in the OpenMPIRBuilder.
224225
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
225226
if (sym->test(semantics::Symbol::Flag::OmpLastPrivate) &&
226227
(sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,13 +1963,13 @@ static void genWsloopClauses(
19631963
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
19641964
ClauseProcessor cp(converter, semaCtx, clauses);
19651965
cp.processNowait(clauseOps);
1966+
cp.processLinear(clauseOps);
19661967
cp.processOrder(clauseOps);
19671968
cp.processOrdered(clauseOps);
19681969
cp.processReduction(loc, clauseOps, reductionSyms);
19691970
cp.processSchedule(stmtCtx, clauseOps);
19701971

1971-
cp.processTODO<clause::Allocate, clause::Linear>(
1972-
loc, llvm::omp::Directive::OMPD_do);
1972+
cp.processTODO<clause::Allocate>(loc, llvm::omp::Directive::OMPD_do);
19731973
}
19741974

19751975
//===----------------------------------------------------------------------===//
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
! This test checks lowering of OpenMP DO Directive (Worksharing)
2+
! with linear clause
3+
4+
! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
5+
6+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
7+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
8+
!CHECK: %[[const:.*]] = arith.constant 1 : i32
9+
subroutine simple_linear
10+
implicit none
11+
integer :: x, y, i
12+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
13+
!$omp do linear(x)
14+
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
15+
!CHECK: %[[const:.*]] = arith.constant 2 : i32
16+
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
17+
do i = 1, 10
18+
y = x + 2
19+
end do
20+
!$omp end do
21+
end subroutine
22+
23+
24+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
25+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
26+
subroutine linear_step
27+
implicit none
28+
integer :: x, y, i
29+
!CHECK: %[[const:.*]] = arith.constant 4 : i32
30+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
31+
!$omp do linear(x:4)
32+
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
33+
!CHECK: %[[const:.*]] = arith.constant 2 : i32
34+
!CHECK: %[[RESULT:.*]] = arith.addi %[[LOAD]], %[[const]] : i32
35+
do i = 1, 10
36+
y = x + 2
37+
end do
38+
!$omp end do
39+
end subroutine
40+
41+
!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
42+
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
43+
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
44+
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
45+
subroutine linear_expr
46+
implicit none
47+
integer :: x, y, i, a
48+
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
49+
!CHECK: %[[const:.*]] = arith.constant 4 : i32
50+
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
51+
!CHECK: omp.wsloop linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
52+
!$omp do linear(x:a+4)
53+
do i = 1, 10
54+
y = x + 2
55+
end do
56+
!$omp end do
57+
end subroutine

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3580,6 +3580,9 @@ class CanonicalLoopInfo {
35803580
BasicBlock *Latch = nullptr;
35813581
BasicBlock *Exit = nullptr;
35823582

3583+
// Hold the MLIR value for the `lastiter` of the canonical loop.
3584+
Value *LastIter = nullptr;
3585+
35833586
/// Add the control blocks of this loop to \p BBs.
35843587
///
35853588
/// This does not include any block from the body, including the one returned
@@ -3612,6 +3615,18 @@ class CanonicalLoopInfo {
36123615
void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
36133616

36143617
public:
3618+
/// Sets the last iteration variable for this loop.
3619+
void setLastIter(Value *IterVar) { LastIter = std::move(IterVar); }
3620+
3621+
/// Returns the last iteration variable for this loop.
3622+
/// Certain use-cases (like translation of linear clause) may access
3623+
/// this variable even after a loop transformation. Hence, do not guard
3624+
/// this getter function by `isValid`. It is the responsibility of the
3625+
/// callee to ensure this functionality is not invoked by a non-outlined
3626+
/// CanonicalLoopInfo object (in which case, `setLastIter` will never be
3627+
/// invoked and `LastIter` will be by default `nullptr`).
3628+
Value *getLastIter() { return LastIter; }
3629+
36153630
/// Returns whether this object currently represents the IR of a loop. If
36163631
/// returning false, it may have been consumed by a loop transformation or not
36173632
/// been intialized. Do not use in this case;

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4254,6 +4254,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
42544254
Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
42554255
Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
42564256
Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
4257+
CLI->setLastIter(PLastIter);
42574258

42584259
// At the end of the preheader, prepare for calling the "init" function by
42594260
// storing the current loop bounds into the allocated space. A canonical loop
@@ -4361,6 +4362,7 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(DebugLoc DL,
43614362
Value *PUpperBound =
43624363
Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound");
43634364
Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride");
4365+
CLI->setLastIter(PLastIter);
43644366

43654367
// Set up the source location value for the OpenMP runtime.
43664368
Builder.restoreIP(CLI->getPreheaderIP());
@@ -4844,6 +4846,7 @@ OpenMPIRBuilder::applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
48444846
Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
48454847
Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
48464848
Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
4849+
CLI->setLastIter(PLastIter);
48474850

48484851
// At the end of the preheader, prepare for calling the "init" function by
48494852
// storing the current loop bounds into the allocated space. A canonical loop

0 commit comments

Comments
 (0)