[mlir][SCF] Do not peel loops inside partial iterations

matthias-springer · matthias-springer · commit bc194a5bb5a4 · 2021-08-23T21:35:46.000+09:00
Do not apply loop peeling to loops that are contained in the partial iteration of an already peeled loop. This is to avoid code explosion when dealing with large loop nests. Can be controlled with a new pass option `skip-partial`. Differential Revision: https://reviews.llvm.org/D108542
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -21,6 +21,12 @@ def SCFForLoopPeeling
     : FunctionPass<"for-loop-peeling"> {
   let summary = "Peel `for` loops at their upper bounds.";
   let constructor = "mlir::createForLoopPeelingPass()";
+  let options = [
+    Option<"skipPartial", "skip-partial", "bool",
+           /*default=*/"true",
+           "Do not peel loops inside of the last, partial iteration of another "
+           "already peeled loop.">
+  ];
   let dependentDialects = ["AffineDialect"];
 }
 
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -41,8 +41,8 @@ void naivelyFuseParallelOps(Region &region);
 
 /// Rewrite a for loop with bounds/step that potentially do not divide evenly
 /// into a for loop where the step divides the iteration space evenly, followed
-/// by an scf.if for the last (partial) iteration (if any). This transformation
-/// is called "loop peeling".
+/// by an scf.if for the last (partial) iteration (if any; returned via `ifOp`).
+/// This transformation is called "loop peeling".
 ///
 /// This transformation is beneficial for a wide range of transformations such
 /// as vectorization or loop tiling: It enables additional canonicalizations
@@ -81,7 +81,8 @@ void naivelyFuseParallelOps(Region &region);
 /// Note: This function rewrites the given scf.for loop in-place and creates a
 /// new scf.if operation for the last iteration. It replaces all uses of the
 /// unpeeled loop with the results of the newly generated scf.if.
-LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp);
+LogicalResult peelAndCanonicalizeForLoop(RewriterBase &rewriter, ForOp forOp,
+                                         scf::IfOp &ifOp);
 
 /// Tile a parallel loop of the form
 ///   scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -362,9 +362,9 @@ static LogicalResult rewritePeeledAffineOp(RewriterBase &rewriter,
 }
 
 LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
-                                                    ForOp forOp) {
+                                                    ForOp forOp,
+                                                    scf::IfOp &ifOp) {
   Value ub = forOp.upperBound();
-  scf::IfOp ifOp;
   Value splitBound;
   if (failed(peelForLoop(rewriter, forOp, ifOp, splitBound)))
     return failure();
@@ -383,23 +383,45 @@ LogicalResult mlir::scf::peelAndCanonicalizeForLoop(RewriterBase &rewriter,
 }
 
 static constexpr char kPeeledLoopLabel[] = "__peeled_loop__";
+static constexpr char kPartialIterationLabel[] = "__partial_iteration__";
 
 namespace {
 struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> {
-  using OpRewritePattern<ForOp>::OpRewritePattern;
+  ForLoopPeelingPattern(MLIRContext *ctx, bool skipPartial)
+      : OpRewritePattern<ForOp>(ctx), skipPartial(skipPartial) {}
 
   LogicalResult matchAndRewrite(ForOp forOp,
                                 PatternRewriter &rewriter) const override {
+    // Do not peel already peeled loops.
     if (forOp->hasAttr(kPeeledLoopLabel))
       return failure();
-    if (failed(peelAndCanonicalizeForLoop(rewriter, forOp)))
+    if (skipPartial) {
+      // No peeling of loops inside the partial iteration (scf.if) of another
+      // peeled loop.
+      Operation *op = forOp.getOperation();
+      while ((op = op->getParentOfType<scf::IfOp>())) {
+        if (op->hasAttr(kPartialIterationLabel))
+          return failure();
+      }
+    }
+    // Apply loop peeling.
+    scf::IfOp ifOp;
+    if (failed(peelAndCanonicalizeForLoop(rewriter, forOp, ifOp)))
       return failure();
     // Apply label, so that the same loop is not rewritten a second time.
     rewriter.updateRootInPlace(forOp, [&]() {
       forOp->setAttr(kPeeledLoopLabel, rewriter.getUnitAttr());
     });
+    ifOp->setAttr(kPartialIterationLabel, rewriter.getUnitAttr());
     return success();
   }
+
+  /// If set to true, loops inside partial iterations of another peeled loop
+  /// are not peeled. This reduces the size of the generated code. Partial
+  /// iterations are not usually performance critical.
+  /// Note: Takes into account the entire chain of parent operations, not just
+  /// the direct parent.
+  bool skipPartial;
 };
 } // namespace
 
@@ -424,11 +446,14 @@ struct ForLoopPeeling : public SCFForLoopPeelingBase<ForLoopPeeling> {
     FuncOp funcOp = getFunction();
     MLIRContext *ctx = funcOp.getContext();
     RewritePatternSet patterns(ctx);
-    patterns.add<ForLoopPeelingPattern>(ctx);
+    patterns.add<ForLoopPeelingPattern>(ctx, skipPartial);
     (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 
-    // Drop the marker.
-    funcOp.walk([](ForOp op) { op->removeAttr(kPeeledLoopLabel); });
+    // Drop the markers.
+    funcOp.walk([](Operation *op) {
+      op->removeAttr(kPeeledLoopLabel);
+      op->removeAttr(kPartialIterationLabel);
+    });
   }
 };
 } // namespace
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling.mlir b/mlir/test/Dialect/SCF/for-loop-peeling.mlir
@@ -1,4 +1,5 @@
 // RUN: mlir-opt %s -for-loop-peeling -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -for-loop-peeling=skip-partial=false -canonicalize -split-input-file | FileCheck %s -check-prefix=CHECK-NO-SKIP
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (s1 - s0) mod s2)>
 //  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> (-(s0 - (s0 - s1) mod s2) + s0)>
@@ -223,3 +224,48 @@ func @test_affine_min_rewrite(%lb : index, %ub: index,
   }
   return
 }
+
+// -----
+
+//     CHECK: func @nested_loops
+//     CHECK:   scf.for {{.*}} {
+//     CHECK:     scf.for {{.*}} {
+//     CHECK:     }
+//     CHECK:     scf.if {{.*}} {
+//     CHECK:     }
+//     CHECK:   }
+//     CHECK:   scf.if {{.*}} {
+//     CHECK:     scf.for {{.*}} {
+//     CHECK:     }
+// CHECK-NOT:     scf.if
+//     CHECK:   }
+
+//     CHECK-NO-SKIP: func @nested_loops
+//     CHECK-NO-SKIP:   scf.for {{.*}} {
+//     CHECK-NO-SKIP:     scf.for {{.*}} {
+//     CHECK-NO-SKIP:     }
+//     CHECK-NO-SKIP:     scf.if {{.*}} {
+//     CHECK-NO-SKIP:     }
+//     CHECK-NO-SKIP:   }
+//     CHECK-NO-SKIP:   scf.if {{.*}} {
+//     CHECK-NO-SKIP:     scf.for {{.*}} {
+//     CHECK-NO-SKIP:     }
+//     CHECK-NO-SKIP:     scf.if {{.*}} {
+//     CHECK-NO-SKIP:     }
+//     CHECK-NO-SKIP:   }
+#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+func @nested_loops(%lb0: index, %lb1 : index, %ub0: index, %ub1: index,
+                   %step: index) -> i32 {
+  %c0 = constant 0 : i32
+  %r0 = scf.for %iv0 = %lb0 to %ub0 step %step iter_args(%arg0 = %c0) -> i32 {
+    %r1 = scf.for %iv1 = %lb1 to %ub1 step %step iter_args(%arg1 = %arg0) -> i32 {
+      %s = affine.min #map(%ub1, %iv1)[%step]
+      %casted = index_cast %s : index to i32
+      %0 = addi %arg1, %casted : i32
+      scf.yield %0 : i32
+    }
+    %1 = addi %arg0, %r1 : i32
+    scf.yield %1 : i32
+  }
+  return %r0 : i32
+}