[flang][OpenMP] Fix the types of worksharing-loop variables

PeixinQiao · PeixinQiao · commit 870f4421acc6 · 2022-05-20T15:16:03.000+08:00
The types of lower bound, upper bound, and step are converted into the type of the loop variable if necessary. OpenMP runtime requires 32-bit or 64-bit loop variables. OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed. This patch is part of upstreaming code from the fir-dev branch of https://github.com/flang-compiler/f18-llvm-project. (#1256) Co-authored-by: kiranchandramohan <kiranchandramohan@gmail.com> Reviewed By: kiranchandramohan, shraiysh Differential Revision: https://reviews.llvm.org/D125740
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
@@ -120,6 +120,24 @@ static void genObjectList(const Fortran::parser::OmpObjectList &objectList,
   }
 }
 
+static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter,
+                                 std::size_t loopVarTypeSize) {
+  // OpenMP runtime requires 32-bit or 64-bit loop variables.
+  loopVarTypeSize = loopVarTypeSize * 8;
+  if (loopVarTypeSize < 32) {
+    loopVarTypeSize = 32;
+  } else if (loopVarTypeSize > 64) {
+    loopVarTypeSize = 64;
+    mlir::emitWarning(converter.getCurrentLocation(),
+                      "OpenMP loop iteration variable cannot have more than 64 "
+                      "bits size and will be narrowed into 64 bits.");
+  }
+  assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) &&
+         "OpenMP loop iteration variable size must be transformed into 32-bit "
+         "or 64-bit");
+  return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize);
+}
+
 /// Create the body (block) for an OpenMP Operation.
 ///
 /// \param [in]    op - the operation the body belongs to.
@@ -143,15 +161,19 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter,
   // e.g. For loops the arguments are the induction variable. And all further
   // uses of the induction variable should use this mlir value.
   if (args.size()) {
+    std::size_t loopVarTypeSize = 0;
+    for (const Fortran::semantics::Symbol *arg : args)
+      loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
+    mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
     SmallVector<Type> tiv;
     SmallVector<Location> locs;
-    int argIndex = 0;
-    for (auto &arg : args) {
-      tiv.push_back(converter.genType(*arg));
+    for (int i = 0; i < (int)args.size(); i++) {
+      tiv.push_back(loopVarType);
       locs.push_back(loc);
     }
     firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);
-    for (auto &arg : args) {
+    int argIndex = 0;
+    for (const Fortran::semantics::Symbol *arg : args) {
       fir::ExtendedValue exval = op.getRegion().front().getArgument(argIndex);
       converter.bindSymbol(*arg, exval);
       argIndex++;
@@ -490,11 +512,12 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
     TODO(converter.getCurrentLocation(), "Construct enclosing do loop");
   }
 
-  int64_t collapseValue = Fortran::lower::getCollapseValue(wsLoopOpClauseList);
-
   // Collect the loops to collapse.
   auto *doConstructEval = &eval.getFirstNestedEvaluation();
 
+  std::int64_t collapseValue =
+      Fortran::lower::getCollapseValue(wsLoopOpClauseList);
+  std::size_t loopVarTypeSize = 0;
   SmallVector<const Fortran::semantics::Symbol *> iv;
   do {
     auto *doLoop = &doConstructEval->getFirstNestedEvaluation();
@@ -518,12 +541,26 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
           currentLocation, firOpBuilder.getIntegerType(32), 1));
     }
     iv.push_back(bounds->name.thing.symbol);
+    loopVarTypeSize = std::max(loopVarTypeSize,
+                               bounds->name.thing.symbol->GetUltimate().size());
 
     collapseValue--;
     doConstructEval =
         &*std::next(doConstructEval->getNestedEvaluations().begin());
   } while (collapseValue > 0);
 
+  // The types of lower bound, upper bound, and step are converted into the
+  // type of the loop variable if necessary.
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) {
+    lowerBound[it] = firOpBuilder.createConvert(currentLocation, loopVarType,
+                                                lowerBound[it]);
+    upperBound[it] = firOpBuilder.createConvert(currentLocation, loopVarType,
+                                                upperBound[it]);
+    step[it] =
+        firOpBuilder.createConvert(currentLocation, loopVarType, step[it]);
+  }
+
   // FIXME: Add support for following clauses:
   // 1. linear
   // 2. order
diff --git a/flang/test/Lower/OpenMP/omp-wsloop-variable.f90 b/flang/test/Lower/OpenMP/omp-wsloop-variable.f90
@@ -0,0 +1,126 @@
+! This test checks lowering of OpenMP DO Directive(Worksharing) for different
+! types of loop iteration variable, lower bound, upper bound, and step.
+
+!REQUIRES: shell
+!RUN: bbc -fopenmp -emit-fir %s -o - 2>&1 | FileCheck %s
+
+!CHECK:  OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed into 64 bits.
+
+program wsloop_variable
+  integer(kind=1) :: i1_lb, i1_ub
+  integer(kind=2) :: i2, i2_ub, i2_s
+  integer(kind=4) :: i4_s
+  integer(kind=8) :: i8, i8_s
+  integer(kind=16) :: i16, i16_lb
+  real :: x
+
+!CHECK:  [[TMP0:%.*]] = arith.constant 1 : i32
+!CHECK:  [[TMP1:%.*]] = arith.constant 100 : i32
+!CHECK:  [[TMP2:%.*]] = fir.convert [[TMP0]] : (i32) -> i64
+!CHECK:  [[TMP3:%.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:  [[TMP4:%.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:  [[TMP5:%.*]] = fir.convert %{{.*}} : (i128) -> i64
+!CHECK:  [[TMP6:%.*]] = fir.convert [[TMP1]] : (i32) -> i64
+!CHECK:  [[TMP7:%.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:  omp.wsloop collapse(2) for ([[TMP8:%.*]], [[TMP9:%.*]]) : i64 = ([[TMP2]], [[TMP5]]) to ([[TMP3]], [[TMP6]]) inclusive step ([[TMP4]], [[TMP7]]) {
+!CHECK:    [[TMP10:%.*]] = arith.addi [[TMP8]], [[TMP9]] : i64
+!CHECK:    [[TMP11:%.*]] = fir.convert [[TMP10]] : (i64) -> f32
+!CHECK:    fir.store [[TMP11]] to %{{.*}} : !fir.ref<f32>
+!CHECK:    omp.yield
+!CHECK:  }
+
+  !$omp do collapse(2)
+  do i2 = 1, i1_ub, i2_s
+    do i8 = i16_lb, 100, i4_s
+      x = i2 + i8
+    end do
+  end do
+  !$omp end do
+
+!CHECK:  [[TMP12:%.*]] = arith.constant 1 : i32
+!CHECK:  [[TMP13:%.*]] = fir.convert %{{.*}} : (i8) -> i32
+!CHECK:  [[TMP14:%.*]] = fir.convert %{{.*}} : (i64) -> i32
+!CHECK:  omp.wsloop for ([[TMP15:%.*]]) : i32 = ([[TMP12]]) to ([[TMP13]]) inclusive step ([[TMP14]])  {
+!CHECK:    [[TMP16:%.*]] = fir.convert [[TMP15]] : (i32) -> f32
+!CHECK:    fir.store [[TMP16]] to %{{.*}} : !fir.ref<f32>
+!CHECK:    omp.yield
+!CHECK:  }
+
+  !$omp do
+  do i2 = 1, i1_ub, i8_s
+    x = i2
+  end do
+  !$omp end do
+
+!CHECK:  [[TMP17:%.*]] = fir.convert %{{.*}} : (i8) -> i64
+!CHECK:  [[TMP18:%.*]] = fir.convert %{{.*}} : (i16) -> i64
+!CHECK:  [[TMP19:%.*]] = fir.convert %{{.*}} : (i32) -> i64
+!CHECK:  omp.wsloop for ([[TMP20:%.*]]) : i64 = ([[TMP17]]) to ([[TMP18]]) inclusive step ([[TMP19]])  {
+!CHECK:    [[TMP21:%.*]] = fir.convert [[TMP20]] : (i64) -> f32
+!CHECK:    fir.store [[TMP21]] to %{{.*}} : !fir.ref<f32>
+!CHECK:    omp.yield
+!CHECK:  }
+
+  !$omp do
+  do i16 = i1_lb, i2_ub, i4_s
+    x = i16
+  end do
+  !$omp end do
+
+end program wsloop_variable
+
+!CHECK-LABEL: func.func @_QPwsloop_variable_sub() {
+!CHECK:         %[[VAL_0:.*]] = fir.alloca i128 {bindc_name = "i16_lb", uniq_name = "_QFwsloop_variable_subEi16_lb"}
+!CHECK:         %[[VAL_1:.*]] = fir.alloca i8 {bindc_name = "i1_ub", uniq_name = "_QFwsloop_variable_subEi1_ub"}
+!CHECK:         %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i2", uniq_name = "_QFwsloop_variable_subEi2"}
+!CHECK:         %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i2_s", uniq_name = "_QFwsloop_variable_subEi2_s"}
+!CHECK:         %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "i4_s", uniq_name = "_QFwsloop_variable_subEi4_s"}
+!CHECK:         %[[VAL_5:.*]] = fir.alloca i64 {bindc_name = "i8", uniq_name = "_QFwsloop_variable_subEi8"}
+!CHECK:         %[[VAL_6:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFwsloop_variable_subEx"}
+!CHECK:         %[[VAL_7:.*]] = arith.constant 1 : i32
+!CHECK:         %[[VAL_8:.*]] = fir.load %[[VAL_1]] : !fir.ref<i8>
+!CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_3]] : !fir.ref<i16>
+!CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_8]] : (i8) -> i32
+!CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32
+!CHECK:         omp.wsloop   for  (%[[VAL_12:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_10]]) inclusive step (%[[VAL_11]]) {
+!CHECK:           %[[VAL_13:.*]] = fir.load %[[VAL_0]] : !fir.ref<i128>
+!CHECK:           %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i128) -> index
+!CHECK:           %[[VAL_15:.*]] = arith.constant 100 : i32
+!CHECK:           %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> index
+!CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_4]] : !fir.ref<i32>
+!CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index
+!CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_18]] -> index {
+!CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64
+!CHECK:             fir.store %[[VAL_21]] to %[[VAL_5]] : !fir.ref<i64>
+!CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_12]] : (i32) -> i64
+!CHECK:             %[[VAL_23:.*]] = fir.load %[[VAL_5]] : !fir.ref<i64>
+!CHECK:             %[[VAL_24:.*]] = arith.addi %[[VAL_22]], %[[VAL_23]] : i64
+!CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i64) -> f32
+!CHECK:             fir.store %[[VAL_25]] to %[[VAL_6]] : !fir.ref<f32>
+!CHECK:             %[[VAL_26:.*]] = arith.addi %[[VAL_20]], %[[VAL_18]] : index
+!CHECK:             fir.result %[[VAL_26]] : index
+!CHECK:           }
+!CHECK:           %[[VAL_27:.*]] = fir.convert %[[VAL_28:.*]] : (index) -> i64
+!CHECK:           fir.store %[[VAL_27]] to %[[VAL_5]] : !fir.ref<i64>
+!CHECK:           omp.yield
+!CHECK:         }
+!CHECK:         return
+!CHECK:       }
+
+subroutine wsloop_variable_sub
+  integer(kind=1) :: i1_ub
+  integer(kind=2) :: i2, i2_s
+  integer(kind=4) :: i4_s
+  integer(kind=8) :: i8
+  integer(kind=16) :: i16_lb
+  real :: x
+
+  !$omp do
+  do i2 = 1, i1_ub, i2_s
+    do i8 = i16_lb, 100, i4_s
+      x = i2 + i8
+    end do
+  end do
+  !$omp end do
+
+end