nod-ai
diff --git a/‎src/Conversion/ONNXToKrnl/Math/Clip.cpp
+54-60 b/‎src/Conversion/ONNXToKrnl/Math/Clip.cpp
+54-60
diff --git a/‎src/Conversion/ONNXToKrnl/Tensor/Split.cpp
+37-32 b/‎src/Conversion/ONNXToKrnl/Tensor/Split.cpp
+37-32
diff --git a/‎src/Conversion/ONNXToKrnl/Tensor/Tile.cpp
+26-39 b/‎src/Conversion/ONNXToKrnl/Tensor/Tile.cpp
+26-39
diff --git a/‎src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp
+23-27 b/‎src/Conversion/ONNXToKrnl/Tensor/Transpose.cpp
+23-27
diff --git a/‎src/Dialect/ONNX/CMakeLists.txt
+1 b/‎src/Dialect/ONNX/CMakeLists.txt
+1
@@ -28,73 +28,67 @@ struct ONNXClipOpLowering : public ConversionPattern {
   LogicalResult matchAndRewrite(Operation *op, ArrayRef<Value> operands,
       ConversionPatternRewriter &rewriter) const final {
     Location loc = op->getLoc();
-    Value input = operands[0];
-    Value min = operands[1];
-    Value max = operands[2];
+    ONNXClipOp clipOp = cast<ONNXClipOp>(op);
+    MemRefType memRefType = convertToMemRefType(*op->result_type_begin());
 
-    // Insert an allocation and deallocation for the result of this operation.
-    auto memRefType = convertToMemRefType(*op->result_type_begin());
-
-    Value alloc;
-    bool insertDealloc = checkInsertDealloc(op);
+    ONNXClipOpAdaptor operandAdaptor(operands);
+    ONNXClipOpShapeHelper shapeHelper(&clipOp, &rewriter,
+        getDenseElementAttributeFromKrnlValue,
+        loadDenseElementArrayValueAtIndex);
+    auto shapeComputed = shapeHelper.computeShape(operandAdaptor);
+    assert(succeeded(shapeComputed));
 
-    if (hasAllConstantDimensions(memRefType))
-      alloc = insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc);
-    else
-      alloc = insertAllocAndDealloc(
-          memRefType, loc, rewriter, insertDealloc, input);
+    Value input = operandAdaptor.input();
+    Value min = operandAdaptor.min();
+    Value max = operandAdaptor.max();
 
-    SmallVector<Value, 4> loopIVs;
-    // Only create krnl.iterate if one of the operands is not scalar tensor.
+    // Insert an allocation and deallocation for the result of this operation.
+    bool insertDealloc = checkInsertDealloc(op);
+    Value alloc =
+        (hasAllConstantDimensions(memRefType))
+            ? insertAllocAndDealloc(memRefType, loc, rewriter, insertDealloc)
+            : insertAllocAndDealloc(
+                  memRefType, loc, rewriter, insertDealloc, input);
+
+    auto computeResult =
+        [&](MultiDialectBuilder<KrnlBuilder, MathBuilder> &create,
+            const ValueRange &indices) {
+          Value loadedVal = create.krnl.load(input, indices);
+          Value res = loadedVal;
+          if (!min.getType().isa<NoneType>()) {
+            Value minVal = create.krnl.load(min);
+            Value lessThanMin = create.math.slt(res, minVal);
+            res = create.math.select(lessThanMin, minVal, res);
+          }
+          if (!max.getType().isa<NoneType>()) {
+            Value maxVal = create.krnl.load(max);
+            Value lessThanMax = create.math.slt(res, maxVal);
+            res = create.math.select(lessThanMax, res, maxVal);
+          }
+          create.krnl.store(res, alloc, indices);
+        };
+
+    // Create a loop only is one of the operands is not a scalar tensor.
     if (!hasAllScalarValues(operands)) {
-      // Create iterateOp & get block within iterate op.
-      BuildKrnlLoop loops(rewriter, loc, memRefType.getRank());
-      loops.createDefineAndIterateOp(input);
-      Block *iterationBlock = loops.getIterateBlock();
-
-      // Insert instructions inside the KernelIterateOp body.
-      rewriter.setInsertionPointToStart(iterationBlock);
-
-      // Handle the operation:
-      for (auto arg : iterationBlock->getArguments())
-        loopIVs.push_back(arg);
-    }
-
-    // Load unary first operand.
-    MultiDialectBuilder<KrnlBuilder, MathBuilder> create(rewriter, loc);
-    Value loadedVal = create.krnl.load(input, loopIVs);
-    Type inputType = loadedVal.getType();
-    Value res = loadedVal;
-
-    if (inputType.isa<FloatType>()) {
-      if (!min.getType().isa<NoneType>()) {
-        Value minVal = create.krnl.load(min);
-        Value lessThanMin = create.math.slt(res, minVal);
-        res = create.math.select(lessThanMin, minVal, res);
-      }
-      if (!max.getType().isa<NoneType>()) {
-        Value maxVal = create.krnl.load(max);
-        Value lessThanMax = create.math.slt(res, maxVal);
-        res = create.math.select(lessThanMax, res, maxVal);
-      }
-    } else if (inputType.isa<IntegerType>()) {
-      if (!min.getType().isa<NoneType>()) {
-        Value minVal = create.krnl.load(min);
-        Value lessThanMin = create.math.slt(res, minVal);
-        res = create.math.select(lessThanMin, minVal, res);
-      }
-      if (!max.getType().isa<NoneType>()) {
-        Value maxVal = create.krnl.load(max);
-        Value lessThanMax = create.math.slt(res, maxVal);
-        res = create.math.select(lessThanMax, res, maxVal);
-      }
+      KrnlBuilder createKrnl(rewriter, loc);
+      uint64_t numLoops = memRefType.getRank();
+      ValueRange loopDef = createKrnl.defineLoops(numLoops);
+
+      SmallVector<IndexExpr, 4> lbs(numLoops, LiteralIndexExpr(0));
+      SmallVector<IndexExpr, 4> ubs;
+      for (uint64_t i = 0; i < numLoops; ++i)
+        ubs.emplace_back(shapeHelper.dimsForOutput()[i]);
+
+      createKrnl.iterateIE(loopDef, loopDef, lbs, ubs,
+          [&](KrnlBuilder &createKrnl, ValueRange indices) {
+            MultiDialectBuilder<KrnlBuilder, MathBuilder> create(createKrnl);
+            computeResult(create, indices);
+          });
     } else {
-      llvm_unreachable("unsupported element type");
+      MultiDialectBuilder<KrnlBuilder, MathBuilder> create(rewriter, loc);
+      computeResult(create, {});
     }
 
-    // Store result in the resulting array.
-    create.krnl.store(res, alloc, loopIVs);
-
     rewriter.replaceOp(op, alloc);
     return success();
   }
 
@@ -21,12 +21,13 @@ template <typename Adaptor, typename Op, typename ShapeHelper>
 LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef<Value> operands,
     ConversionPatternRewriter &rewriter) {
   // Gather info.
-  auto loc = op->getLoc();
+  Location loc = op->getLoc();
   Adaptor operandAdaptor(operands);
-  Op splitOp = llvm::dyn_cast<Op>(op);
-  auto rank = splitOp.input().getType().template cast<ShapedType>().getRank();
-  auto outputNum = splitOp.getNumResults();
-  auto axis = splitOp.axis();
+  Op splitOp = cast<Op>(op);
+  uint64_t rank =
+      splitOp.input().getType().template cast<ShapedType>().getRank();
+  unsigned outputNum = splitOp.getNumResults();
+  unsigned axis = splitOp.axis();
 
   // Get a shape helper.
   ShapeHelper shapeHelper(&splitOp, &rewriter,
@@ -36,7 +37,7 @@ LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef<Value> operands,
 
   // Alloc and dealloc.
   SmallVector<Value, 4> allocs;
-  for (unsigned int i = 0; i < outputNum; ++i) {
+  for (unsigned i = 0; i < outputNum; ++i) {
     checkInsertDealloc(op, i);
     auto memRefType = convertToMemRefType(splitOp.outputs()[i].getType());
     Value alloc = insertAllocAndDeallocSimple(
@@ -45,40 +46,44 @@ LogicalResult ONNXSplitOpLoweringCommon(Operation *op, ArrayRef<Value> operands,
   }
 
   // Creates loops, one for each output.
-  for (unsigned int i = 0; i < outputNum; ++i) {
+  for (unsigned i = 0; i < outputNum; ++i) {
     OpBuilder::InsertionGuard insertGuard(rewriter);
-    // Create loop.
-    BuildKrnlLoop outputLoops(rewriter, loc, rank);
-    outputLoops.createDefineAndIterateOp(allocs[i]);
-    rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
 
     // Scope for krnl ops
     IndexExprScope childScope(&rewriter, shapeHelper.scope);
+
     KrnlBuilder createKrnl(rewriter, loc);
+    ValueRange loopDef = createKrnl.defineLoops(rank);
+    SmallVector<IndexExpr, 4> lbs(rank, LiteralIndexExpr(0));
+
+    MemRefBoundsIndexCapture allocsBounds(allocs[i]);
+    SmallVector<IndexExpr, 4> ubs;
+    allocsBounds.getDimList(ubs);
+
+    createKrnl.iterateIE(loopDef, loopDef, lbs, ubs,
+        [&](KrnlBuilder &createKrnl, ValueRange indices) {
+          SmallVector<IndexExpr, 4> readIndices;
+          for (uint64_t r = 0; r < rank; ++r) {
+            DimIndexExpr readIndex(indices[r]);
+            // Compute read index for the split axis.
+            if (r == axis)
+              for (unsigned k = 0; k < i; ++k) {
+                SymbolIndexExpr splitDim(shapeHelper.dimsForOutput(k)[r]);
+                readIndex = readIndex + splitDim;
+              }
 
-    // Indices for the read and write.
-    SmallVector<IndexExpr, 4> readIndices;
-    SmallVector<IndexExpr, 4> writeIndices;
-    for (int r = 0; r < rank; ++r) {
-      Value readVal = outputLoops.getInductionVar(r);
-      // If not the split axis, same index for read and write
-      IndexExpr readIndex = DimIndexExpr(readVal);
-      DimIndexExpr writeIndex(readVal);
-      // If the split axis, compute read index for the split axis.
-      if (r == axis) {
-        for (unsigned int k = 0; k < i; ++k) {
-          IndexExpr splitDim = SymbolIndexExpr(shapeHelper.dimsForOutput(k)[r]);
-          readIndex = readIndex + splitDim;
-        }
-      }
-      readIndices.emplace_back(readIndex);
-      writeIndices.emplace_back(writeIndex);
-    }
-    // Insert copy.
-    Value loadData = createKrnl.loadIE(operandAdaptor.input(), readIndices);
-    createKrnl.storeIE(loadData, allocs[i], writeIndices);
+            readIndices.emplace_back(readIndex);
+          }
+
+          // Insert copy.
+          Value loadData =
+              createKrnl.loadIE(operandAdaptor.input(), readIndices);
+          createKrnl.store(loadData, allocs[i], indices);
+        });
   }
+
   rewriter.replaceOp(op, allocs);
+
   return success();
 }
 
 
@@ -71,48 +71,35 @@ struct ONNXTileOpLowering : public ConversionPattern {
     (void)shapecomputed;
     assert(!failed(shapecomputed) && "expected to succeed");
 
-    MemRefType outputMemRefType = convertToMemRefType(*op->result_type_begin());
-    auto outputMemRefShape = outputMemRefType.getShape();
-    int64_t outputRank = outputMemRefShape.size();
+    MemRefType memRefType = convertToMemRefType(*op->result_type_begin());
+    llvm::ArrayRef<int64_t> memRefShape = memRefType.getShape();
+    uint64_t outputRank = memRefShape.size();
 
     Value input = operandAdaptor.input();
-
     Value alloc = insertAllocAndDeallocSimple(
-        rewriter, op, outputMemRefType, loc, shapeHelper.dimsForOutput(0));
-
-    // Define loops and iteration trip counts (equivalent to size of output)
-    BuildKrnlLoop outputLoops(rewriter, loc, outputRank);
-    outputLoops.createDefineOp();
-    outputLoops.pushAllBounds(shapeHelper.dimsForOutput(0));
-    outputLoops.createIterateOp();
-    rewriter.setInsertionPointToStart(outputLoops.getIterateBlock());
-
-    SmallVector<Value, 4> loadIndices;
-    // This implementation is to iterate the output tensor.
-    // The store has simple affine subscript expression.
-    // Alternative implementation is to iterate the input tensor and repeats.
-    // The load of elements in input tensor can be reused explicitly.
-    // But the subscript of store is not contigious, or even not affine.
-    // Alternative implementation can be found at the end of this file.
-
-    for (int64_t i = 0; i < outputRank; i++) {
-      // Scope is created for each dimension because they are independent
-      IndexExprScope IEScope(&rewriter, loc);
-      DimIndexExpr index(outputLoops.getInductionVar(i));
-      MemRefBoundsIndexCapture inputBounds(input);
-      DimIndexExpr dimSize(inputBounds.getDim(i));
-      IndexExpr exprVal = index % dimSize;
-      loadIndices.emplace_back(exprVal.getValue());
-    }
-
-    MultiDialectBuilder<KrnlBuilder, MemRefBuilder, MathBuilder> create(
-        rewriter, loc);
-    Value loadVal = create.krnl.load(input, loadIndices);
-
-    SmallVector<Value, 4> storeIndices;
-    for (int64_t i = 0; i < outputRank; ++i)
-      storeIndices.emplace_back(outputLoops.getInductionVar(i));
-    create.krnl.store(loadVal, alloc, storeIndices);
+        rewriter, op, memRefType, loc, shapeHelper.dimsForOutput());
+
+    KrnlBuilder createKrnl(rewriter, loc);
+    ValueRange loopDef = createKrnl.defineLoops(outputRank);
+    SmallVector<IndexExpr, 4> lbs(outputRank, LiteralIndexExpr(0));
+
+    MemRefBoundsIndexCapture inputBounds(input);
+    createKrnl.iterateIE(loopDef, loopDef, lbs, shapeHelper.dimsForOutput(),
+        [&](KrnlBuilder &createKrnl, ValueRange indices) {
+          // Compute the indices used by the input tensor load operation.
+          // Note: An alternative implementation can be found at the end of this
+          // file.
+          SmallVector<Value, 4> loadIndices;
+          for (uint64_t i = 0; i < outputRank; ++i) {
+            DimIndexExpr index(indices[i]);
+            DimIndexExpr dimSize(inputBounds.getDim(i));
+            IndexExpr exprVal = index % dimSize;
+            loadIndices.emplace_back(exprVal.getValue());
+          }
+
+          Value loadVal = createKrnl.load(input, loadIndices);
+          createKrnl.store(loadVal, alloc, indices);
+        });
 
     rewriter.replaceOp(op, alloc);
 
 
@@ -34,7 +34,7 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
 
     // Basic information.
     auto memRefType = convertToMemRefType(*op->result_type_begin());
-    int64_t rank = memRefType.getShape().size();
+    uint64_t rank = memRefType.getShape().size();
 
     // Get a shape helper.
     ONNXTransposeOpShapeHelper shapeHelper(&transposeOp, &rewriter,
@@ -46,32 +46,28 @@ struct ONNXTransposeOpLowering : public ConversionPattern {
 
     // Insert an allocation and deallocation for the result of this operation.
     Value alloc = insertAllocAndDeallocSimple(
-        rewriter, op, memRefType, loc, shapeHelper.dimsForOutput(0));
-
-    // Create loop.
-    BuildKrnlLoop inputLoops(rewriter, loc, rank);
-    inputLoops.createDefineAndIterateOp(data);
-    rewriter.setInsertionPointToStart(inputLoops.getIterateBlock());
-    {
-      // Get a child IndexExpr context.
-      IndexExprScope childScope(&rewriter, shapeHelper.scope);
-      KrnlBuilder createKrnl(rewriter, loc);
-
-      // Get read/write indices.
-      SmallVector<IndexExpr, 4> readIndices;
-      SmallVector<IndexExpr, 4> writeIndices;
-      for (decltype(rank) i = 0; i < rank; ++i) {
-        Value readVal = inputLoops.getInductionVar(i);
-        Value writeVal =
-            inputLoops.getInductionVar(ArrayAttrIntVal(permAttr, i));
-        readIndices.emplace_back(DimIndexExpr(readVal));
-        writeIndices.emplace_back(DimIndexExpr(writeVal));
-      }
-
-      // Copy data.
-      Value loadData = createKrnl.loadIE(data, readIndices);
-      createKrnl.storeIE(loadData, alloc, writeIndices);
-    }
+        rewriter, op, memRefType, loc, shapeHelper.dimsForOutput());
+
+    KrnlBuilder createKrnl(rewriter, loc);
+    ValueRange loopDef = createKrnl.defineLoops(rank);
+    SmallVector<IndexExpr, 4> lbs(rank, LiteralIndexExpr(0));
+
+    MemRefBoundsIndexCapture dataBounds(data);
+    SmallVector<IndexExpr, 4> ubs;
+    dataBounds.getDimList(ubs);
+
+    createKrnl.iterateIE(loopDef, loopDef, lbs, ubs,
+        [&](KrnlBuilder &createKrnl, ValueRange indices) {
+          // Compute the indices used by the load operation.
+          SmallVector<IndexExpr, 4> storeIndices;
+          for (uint64_t i = 0; i < rank; ++i) {
+            Value index = indices[ArrayAttrIntVal(permAttr, i)];
+            storeIndices.emplace_back(DimIndexExpr(index));
+          }
+
+          Value loadData = createKrnl.load(data, indices);
+          createKrnl.storeIE(loadData, alloc, storeIndices);
+        });
 
     rewriter.replaceOp(op, alloc);
 
 
@@ -24,6 +24,7 @@ add_onnx_mlir_library(OMONNXOps
   ShapeInference/ArgMax.cpp
   ShapeInference/AveragePool.cpp
   ShapeInference/CategoryMapper.cpp  
+  ShapeInference/Clip.cpp  
   ShapeInference/Compress.cpp
   ShapeInference/Concat.cpp
   ShapeInference/Conv.cpp