diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index d07ebbacc604..857859384058 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -256,6 +256,32 @@ def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2], ]; } +def LLVM_MemsetInlineOp : LLVM_ZeroResultIntrOp<"memset.inline", [0, 2], + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods], + /*requiresAccessGroup=*/1, /*requiresAliasAnalysis=*/1, + /*requiresOpBundles=*/0, /*immArgPositions=*/[2, 3], + /*immArgAttrNames=*/["len", "isVolatile"]> { + dag args = (ins Arg:$dst, + I8:$val, APIntAttr:$len, I1Attr:$isVolatile); + // Append the alias attributes defined by LLVM_IntrOpBase. + let arguments = !con(args, aliasAttrs); + let builders = [ + OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len, + "bool":$isVolatile), [{ + build($_builder, $_state, dst, val, len, + $_builder.getBoolAttr(isVolatile)); + }]>, + OpBuilder<(ins "Value":$dst, "Value":$val, "IntegerAttr":$len, + "IntegerAttr":$isVolatile), [{ + build($_builder, $_state, dst, val, len, isVolatile, + /*access_groups=*/nullptr, /*alias_scopes=*/nullptr, + /*noalias_scopes=*/nullptr, /*tbaa=*/nullptr); + }]> + ]; +} + def LLVM_NoAliasScopeDeclOp : LLVM_ZeroResultIntrOp<"experimental.noalias.scope.decl"> { let arguments = (ins LLVM_AliasScopeAttr:$scope); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp index cff16afc73af..a59900745d02 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInterfaces.cpp @@ -94,6 +94,10 @@ SmallVector mlir::LLVM::MemsetOp::getAccessedOperands() { return {getDst()}; } +SmallVector mlir::LLVM::MemsetInlineOp::getAccessedOperands() { + return {getDst()}; +} + SmallVector mlir::LLVM::CallOp::getAccessedOperands() { return llvm::to_vector( llvm::make_filter_range(getArgOperands(), [](Value arg) { diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 5dc506c14ef9..8b164cf59a07 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -904,6 +904,76 @@ std::optional getStaticMemIntrLen(LLVM::MemcpyInlineOp op) { return memIntrLen.getZExtValue(); } +/// Returns the length of the given memory intrinsic in bytes if it can be known +/// at compile-time on a best-effort basis, nothing otherwise. +/// Because MemsetInlineOp has its length encoded as an attribute, this requires +/// specialized handling. +template <> +std::optional getStaticMemIntrLen(LLVM::MemsetInlineOp op) { + APInt memIntrLen = op.getLen(); + if (memIntrLen.getBitWidth() > 64) + return {}; + return memIntrLen.getZExtValue(); +} + +/// Returns an integer attribute representing the length of a memset intrinsic +template +IntegerAttr createMemsetLenAttr(MemsetIntr op) { + IntegerAttr memsetLenAttr; + bool successfulMatch = + matchPattern(op.getLen(), m_Constant(&memsetLenAttr)); + (void)successfulMatch; + assert(successfulMatch); + return memsetLenAttr; +} + +/// Returns an integer attribute representing the length of a memset intrinsic +/// Because MemsetInlineOp has its length encoded as an attribute, this requires +/// specialized handling. +template <> +IntegerAttr createMemsetLenAttr(LLVM::MemsetInlineOp op) { + return op.getLenAttr(); +} + +/// Creates a memset intrinsic of that matches the `toReplace` intrinsic +/// using the provided parameters. There are template specializations for +/// MemsetOp and MemsetInlineOp. +template +void createMemsetIntr(OpBuilder &builder, MemsetIntr toReplace, + IntegerAttr memsetLenAttr, uint64_t newMemsetSize, + DenseMap &subslots, + Attribute index); + +template <> +void createMemsetIntr(OpBuilder &builder, LLVM::MemsetOp toReplace, + IntegerAttr memsetLenAttr, uint64_t newMemsetSize, + DenseMap &subslots, + Attribute index) { + Value newMemsetSizeValue = + builder + .create( + toReplace.getLen().getLoc(), + IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize)) + .getResult(); + + builder.create(toReplace.getLoc(), subslots.at(index).ptr, + toReplace.getVal(), newMemsetSizeValue, + toReplace.getIsVolatile()); +} + +template <> +void createMemsetIntr(OpBuilder &builder, LLVM::MemsetInlineOp toReplace, + IntegerAttr memsetLenAttr, uint64_t newMemsetSize, + DenseMap &subslots, + Attribute index) { + auto newMemsetSizeValue = + IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize); + + builder.create( + toReplace.getLoc(), subslots.at(index).ptr, toReplace.getVal(), + newMemsetSizeValue, toReplace.getIsVolatile()); +} + } // namespace /// Returns whether one can be sure the memory intrinsic does not write outside @@ -931,23 +1001,37 @@ static bool areAllIndicesI32(const DestructurableMemorySlot &slot) { } //===----------------------------------------------------------------------===// -// Interfaces for memset +// Interfaces for memset and memset.inline //===----------------------------------------------------------------------===// -bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; } +template +static bool memsetCanRewire(MemsetIntr op, const DestructurableMemorySlot &slot, + SmallPtrSetImpl &usedIndices, + SmallVectorImpl &mustBeSafelyUsed, + const DataLayout &dataLayout) { + if (&slot.elemType.getDialect() != op.getOperation()->getDialect()) + return false; -bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) { - return getDst() == slot.ptr; + if (op.getIsVolatile()) + return false; + + if (!cast(slot.elemType).getSubelementIndexMap()) + return false; + + if (!areAllIndicesI32(slot)) + return false; + + return definitelyWritesOnlyWithinSlot(op, slot, dataLayout); } -Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder, - Value reachingDef, - const DataLayout &dataLayout) { +template +static Value memsetGetStored(MemsetIntr op, const MemorySlot &slot, + OpBuilder &builder) { // TODO: Support non-integer types. return TypeSwitch(slot.elemType) .Case([&](IntegerType intType) -> Value { if (intType.getWidth() == 8) - return getVal(); + return op.getVal(); assert(intType.getWidth() % 8 == 0); @@ -955,14 +1039,14 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder, // or-ing it with the previous value. uint64_t coveredBits = 8; Value currentValue = - builder.create(getLoc(), intType, getVal()); + builder.create(op.getLoc(), intType, op.getVal()); while (coveredBits < intType.getWidth()) { - Value shiftBy = - builder.create(getLoc(), intType, coveredBits); + Value shiftBy = builder.create(op.getLoc(), intType, + coveredBits); Value shifted = - builder.create(getLoc(), currentValue, shiftBy); + builder.create(op.getLoc(), currentValue, shiftBy); currentValue = - builder.create(getLoc(), currentValue, shifted); + builder.create(op.getLoc(), currentValue, shifted); coveredBits *= 2; } @@ -974,10 +1058,12 @@ Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder, }); } -bool LLVM::MemsetOp::canUsesBeRemoved( - const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, - SmallVectorImpl &newBlockingUses, - const DataLayout &dataLayout) { +template +static bool +memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot, + const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { // TODO: Support non-integer types. bool canConvertType = TypeSwitch(slot.elemType) @@ -988,62 +1074,28 @@ bool LLVM::MemsetOp::canUsesBeRemoved( if (!canConvertType) return false; - if (getIsVolatile()) + if (op.getIsVolatile()) return false; - return getStaticMemIntrLen(*this) == dataLayout.getTypeSize(slot.elemType); -} - -DeletionKind LLVM::MemsetOp::removeBlockingUses( - const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, - OpBuilder &builder, Value reachingDefinition, - const DataLayout &dataLayout) { - return DeletionKind::Delete; -} - -LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses( - const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed, - const DataLayout &dataLayout) { - return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout)); + return getStaticMemIntrLen(op) == dataLayout.getTypeSize(slot.elemType); } -bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot, - SmallPtrSetImpl &usedIndices, - SmallVectorImpl &mustBeSafelyUsed, - const DataLayout &dataLayout) { - if (&slot.elemType.getDialect() != getOperation()->getDialect()) - return false; - - if (getIsVolatile()) - return false; - - if (!cast(slot.elemType).getSubelementIndexMap()) - return false; - - if (!areAllIndicesI32(slot)) - return false; - - return definitelyWritesOnlyWithinSlot(*this, slot, dataLayout); -} +template +static DeletionKind +memsetRewire(MemsetIntr op, const DestructurableMemorySlot &slot, + DenseMap &subslots, OpBuilder &builder, + const DataLayout &dataLayout) { -DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot, - DenseMap &subslots, - OpBuilder &builder, - const DataLayout &dataLayout) { std::optional> types = cast(slot.elemType).getSubelementIndexMap(); - IntegerAttr memsetLenAttr; - bool successfulMatch = - matchPattern(getLen(), m_Constant(&memsetLenAttr)); - (void)successfulMatch; - assert(successfulMatch); + IntegerAttr memsetLenAttr = createMemsetLenAttr(op); bool packed = false; if (auto structType = dyn_cast(slot.elemType)) packed = structType.isPacked(); - Type i32 = IntegerType::get(getContext(), 32); + Type i32 = IntegerType::get(op.getContext(), 32); uint64_t memsetLen = memsetLenAttr.getValue().getZExtValue(); uint64_t covered = 0; for (size_t i = 0; i < types->size(); i++) { @@ -1063,16 +1115,8 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot, // Otherwise, only compute its offset within the original memset. if (subslots.contains(index)) { uint64_t newMemsetSize = std::min(memsetLen - covered, typeSize); - - Value newMemsetSizeValue = - builder - .create( - getLen().getLoc(), - IntegerAttr::get(memsetLenAttr.getType(), newMemsetSize)) - .getResult(); - - builder.create(getLoc(), subslots.at(index).ptr, getVal(), - newMemsetSizeValue, getIsVolatile()); + createMemsetIntr(builder, op, memsetLenAttr, newMemsetSize, subslots, + index); } covered += typeSize; @@ -1081,6 +1125,103 @@ DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot, return DeletionKind::Delete; } +bool LLVM::MemsetOp::loadsFrom(const MemorySlot &slot) { return false; } + +bool LLVM::MemsetOp::storesTo(const MemorySlot &slot) { + return getDst() == slot.ptr; +} + +Value LLVM::MemsetOp::getStored(const MemorySlot &slot, OpBuilder &builder, + Value reachingDef, + const DataLayout &dataLayout) { + return memsetGetStored(*this, slot, builder); +} + +bool LLVM::MemsetOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses, + dataLayout); +} + +DeletionKind LLVM::MemsetOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition, + const DataLayout &dataLayout) { + return DeletionKind::Delete; +} + +LogicalResult LLVM::MemsetOp::ensureOnlySafeAccesses( + const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed, + const DataLayout &dataLayout) { + return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout)); +} + +bool LLVM::MemsetOp::canRewire(const DestructurableMemorySlot &slot, + SmallPtrSetImpl &usedIndices, + SmallVectorImpl &mustBeSafelyUsed, + const DataLayout &dataLayout) { + return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed, + dataLayout); +} + +DeletionKind LLVM::MemsetOp::rewire(const DestructurableMemorySlot &slot, + DenseMap &subslots, + OpBuilder &builder, + const DataLayout &dataLayout) { + return memsetRewire(*this, slot, subslots, builder, dataLayout); +} + +bool LLVM::MemsetInlineOp::loadsFrom(const MemorySlot &slot) { return false; } + +bool LLVM::MemsetInlineOp::storesTo(const MemorySlot &slot) { + return getDst() == slot.ptr; +} + +Value LLVM::MemsetInlineOp::getStored(const MemorySlot &slot, + OpBuilder &builder, Value reachingDef, + const DataLayout &dataLayout) { + return memsetGetStored(*this, slot, builder); +} + +bool LLVM::MemsetInlineOp::canUsesBeRemoved( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + SmallVectorImpl &newBlockingUses, + const DataLayout &dataLayout) { + return memsetCanUsesBeRemoved(*this, slot, blockingUses, newBlockingUses, + dataLayout); +} + +DeletionKind LLVM::MemsetInlineOp::removeBlockingUses( + const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, + OpBuilder &builder, Value reachingDefinition, + const DataLayout &dataLayout) { + return DeletionKind::Delete; +} + +LogicalResult LLVM::MemsetInlineOp::ensureOnlySafeAccesses( + const MemorySlot &slot, SmallVectorImpl &mustBeSafelyUsed, + const DataLayout &dataLayout) { + return success(definitelyWritesOnlyWithinSlot(*this, slot, dataLayout)); +} + +bool LLVM::MemsetInlineOp::canRewire( + const DestructurableMemorySlot &slot, + SmallPtrSetImpl &usedIndices, + SmallVectorImpl &mustBeSafelyUsed, + const DataLayout &dataLayout) { + return memsetCanRewire(*this, slot, usedIndices, mustBeSafelyUsed, + dataLayout); +} + +DeletionKind +LLVM::MemsetInlineOp::rewire(const DestructurableMemorySlot &slot, + DenseMap &subslots, + OpBuilder &builder, const DataLayout &dataLayout) { + return memsetRewire(*this, slot, subslots, builder, dataLayout); +} + //===----------------------------------------------------------------------===// // Interfaces for memcpy/memmove //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir index 4fc80a87f20d..646667505a37 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg-intrinsics.mlir @@ -23,6 +23,28 @@ llvm.func @basic_memset(%memset_value: i8) -> i32 { // ----- +// CHECK-LABEL: llvm.func @basic_memset_inline +// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8) +llvm.func @basic_memset_inline(%memset_value: i8) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4 : i32}> : (!llvm.ptr, i8) -> () + // CHECK-NOT: "llvm.intr.memset.inline" + // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i32 + // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]] + // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]] + // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32 + // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]] + // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]] + // CHECK-NOT: "llvm.intr.memset.inline" + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[VALUE_32]] : i32 + llvm.return %2 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @basic_memset_constant llvm.func @basic_memset_constant() -> i32 { %0 = llvm.mlir.constant(1 : i32) : i32 @@ -45,6 +67,27 @@ llvm.func @basic_memset_constant() -> i32 { // ----- +// CHECK-LABEL: llvm.func @basic_memset_inline_constant +llvm.func @basic_memset_inline_constant() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> () + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[C42:.*]] = llvm.mlir.constant(42 : i8) : i8 + // CHECK: %[[VALUE_42:.*]] = llvm.zext %[[C42]] : i8 to i32 + // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i32) : i32 + // CHECK: %[[SHIFTED_42:.*]] = llvm.shl %[[VALUE_42]], %[[C8]] : i32 + // CHECK: %[[OR0:.*]] = llvm.or %[[VALUE_42]], %[[SHIFTED_42]] : i32 + // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i32) : i32 + // CHECK: %[[SHIFTED:.*]] = llvm.shl %[[OR0]], %[[C16]] : i32 + // CHECK: %[[RES:..*]] = llvm.or %[[OR0]], %[[SHIFTED]] : i32 + // CHECK: llvm.return %[[RES]] : i32 + llvm.return %2 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @exotic_target_memset // CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8) llvm.func @exotic_target_memset(%memset_value: i8) -> i40 { @@ -71,6 +114,31 @@ llvm.func @exotic_target_memset(%memset_value: i8) -> i40 { // ----- +// CHECK-LABEL: llvm.func @exotic_target_memset_inline +// CHECK-SAME: (%[[MEMSET_VALUE:.*]]: i8) +llvm.func @exotic_target_memset_inline(%memset_value: i8) -> i40 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i40 {alignment = 4 : i64} : (i32) -> !llvm.ptr + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 5}> : (!llvm.ptr, i8) -> () + // CHECK-NOT: "llvm.intr.memset.inline" + // CHECK: %[[VALUE_8:.*]] = llvm.zext %[[MEMSET_VALUE]] : i8 to i40 + // CHECK: %[[C8:.*]] = llvm.mlir.constant(8 : i40) : i40 + // CHECK: %[[SHIFTED_8:.*]] = llvm.shl %[[VALUE_8]], %[[C8]] + // CHECK: %[[VALUE_16:.*]] = llvm.or %[[VALUE_8]], %[[SHIFTED_8]] + // CHECK: %[[C16:.*]] = llvm.mlir.constant(16 : i40) : i40 + // CHECK: %[[SHIFTED_16:.*]] = llvm.shl %[[VALUE_16]], %[[C16]] + // CHECK: %[[VALUE_32:.*]] = llvm.or %[[VALUE_16]], %[[SHIFTED_16]] + // CHECK: %[[C32:.*]] = llvm.mlir.constant(32 : i40) : i40 + // CHECK: %[[SHIFTED_COMPL:.*]] = llvm.shl %[[VALUE_32]], %[[C32]] + // CHECK: %[[VALUE_COMPL:.*]] = llvm.or %[[VALUE_32]], %[[SHIFTED_COMPL]] + // CHECK-NOT: "llvm.intr.memset.inline" + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i40 + // CHECK: llvm.return %[[VALUE_COMPL]] : i40 + llvm.return %2 : i40 +} + +// ----- + // CHECK-LABEL: llvm.func @no_volatile_memset llvm.func @no_volatile_memset() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -89,6 +157,22 @@ llvm.func @no_volatile_memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @no_volatile_memset_inline +llvm.func @no_volatile_memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 4 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 4}> : (!llvm.ptr, i8) -> () + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %2 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @no_partial_memset llvm.func @no_partial_memset() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -107,6 +191,22 @@ llvm.func @no_partial_memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @no_partial_memset_inline +llvm.func @no_partial_memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> () + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %2 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @no_overflowing_memset llvm.func @no_overflowing_memset() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -125,6 +225,22 @@ llvm.func @no_overflowing_memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @no_overflowing_memset_inline +llvm.func @no_overflowing_memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> () + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + llvm.return %2 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset llvm.func @only_byte_aligned_integers_memset() -> i10 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -143,6 +259,22 @@ llvm.func @only_byte_aligned_integers_memset() -> i10 { // ----- +// CHECK-LABEL: llvm.func @only_byte_aligned_integers_memset_inline +llvm.func @only_byte_aligned_integers_memset_inline() -> i10 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i10 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i10 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 2}> : (!llvm.ptr, i8) -> () + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i10 + llvm.return %2 : i10 +} + +// ----- + // CHECK-LABEL: llvm.func @basic_memcpy // CHECK-SAME: (%[[SOURCE:.*]]: !llvm.ptr) llvm.func @basic_memcpy(%source: !llvm.ptr) -> i32 { diff --git a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir index ba73025814cc..6dc8a97884ee 100644 --- a/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir +++ b/mlir/test/Dialect/LLVMIR/sroa-intrinsics.mlir @@ -21,6 +21,25 @@ llvm.func @memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline +llvm.func @memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + // After SROA, only one i32 will be actually used, so only 4 bytes will be set. + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // 16 bytes means it will span over the first 4 i32 entries. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 16}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_partial llvm.func @memset_partial() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -43,6 +62,26 @@ llvm.func @memset_partial() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_partial +llvm.func @memset_inline_partial() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + // After SROA, only the second i32 will be actually used. As the memset writes up + // to half of it, only 2 bytes will be set. + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // 6 bytes means it will span over the first i32 and half of the second i32. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 2 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_full llvm.func @memset_full() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -64,6 +103,25 @@ llvm.func @memset_full() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_full +llvm.func @memset_inline_full() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + // After SROA, only one i32 will be actually used, so only 4 bytes will be set. + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // 40 bytes means it will span over the entire array. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 40}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_too_much llvm.func @memset_too_much() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -84,6 +142,24 @@ llvm.func @memset_too_much() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_too_much +llvm.func @memset_inline_too_much() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // 41 bytes means it will span over the entire array, and then some. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 41 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 41}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_no_volatile llvm.func @memset_no_volatile() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -103,6 +179,23 @@ llvm.func @memset_no_volatile() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_no_volatile +llvm.func @memset_inline_no_volatile() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.array<10 x i32> + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = true, len = 16 : i64}> + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = true, len = 16}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @indirect_memset llvm.func @indirect_memset() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -123,6 +216,24 @@ llvm.func @indirect_memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @indirect_memset_inline +llvm.func @indirect_memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // This memset will only cover the selected element. + %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 4}> : (!llvm.ptr, i8) -> () + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @invalid_indirect_memset llvm.func @invalid_indirect_memset() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -144,6 +255,25 @@ llvm.func @invalid_indirect_memset() -> i32 { // ----- +// CHECK-LABEL: llvm.func @invalid_indirect_memset_inline +llvm.func @invalid_indirect_memset_inline() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_LEN]] x !llvm.struct<"foo", (i32, i32)> + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // This memset will go slightly beyond one of the elements. + // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 0] + %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, i32)> + // CHECK: "llvm.intr.memset.inline"(%[[GEP]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 6 : i64}> + "llvm.intr.memset.inline"(%2, %memset_value) <{isVolatile = false, len = 6}> : (!llvm.ptr, i8) -> () + %3 = llvm.load %2 : !llvm.ptr -> i32 + llvm.return %3 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_double_use llvm.func @memset_double_use() -> i32 { // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -176,6 +306,35 @@ llvm.func @memset_double_use() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_double_use +llvm.func @memset_inline_double_use() -> i32 { + // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 + // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // We expect two generated memset, one for each field. + // CHECK-NOT: "llvm.intr.memset.inline" + // After SROA, only one i32 will be actually used, so only 4 bytes will be set. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + // CHECK-NOT: "llvm.intr.memset.inline" + // 8 bytes means it will span over the two i32 entries. + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + %4 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i32, f32)> + %5 = llvm.load %4 : !llvm.ptr -> f32 + // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. + %6 = llvm.bitcast %5 : f32 to i32 + %7 = llvm.add %3, %6 : i32 + llvm.return %7 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_considers_alignment llvm.func @memset_considers_alignment() -> i32 { // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -207,6 +366,35 @@ llvm.func @memset_considers_alignment() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_considers_alignment +llvm.func @memset_inline_considers_alignment() -> i32 { + // CHECK-DAG: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-DAG: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK-DAG: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // 8 bytes means it will span over the i8 and the i32 entry. + // Because of padding, the f32 entry will not be touched. + // Even though the two i32 are used, only one memset should be generated, + // as the second i32 is not touched by the initial memset. + // CHECK-NOT: "llvm.intr.memset.inline" + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + // CHECK-NOT: "llvm.intr.memset.inline" + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (i8, i32, f32)> + %5 = llvm.load %4 : !llvm.ptr -> f32 + // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. + %6 = llvm.bitcast %5 : f32 to i32 + %7 = llvm.add %3, %6 : i32 + llvm.return %7 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memset_considers_packing llvm.func @memset_considers_packing() -> i32 { // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 @@ -239,6 +427,35 @@ llvm.func @memset_considers_packing() -> i32 { // ----- +// CHECK-LABEL: llvm.func @memset_inline_considers_packing +llvm.func @memset_inline_considers_packing() -> i32 { + // CHECK: %[[ALLOCA_LEN:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA_FLOAT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x f32 + // CHECK: %[[ALLOCA_INT:.*]] = llvm.alloca %[[ALLOCA_LEN]] x i32 + // CHECK: %[[MEMSET_VALUE:.*]] = llvm.mlir.constant(42 : i8) : i8 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i8, i32, f32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %memset_value = llvm.mlir.constant(42 : i8) : i8 + // Now all fields are touched by the memset. + // CHECK-NOT: "llvm.intr.memset.inline" + // After SROA, only 32-bit values will be actually used, so only 4 bytes will be set. + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_INT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 4 : i64}> + // CHECK: "llvm.intr.memset.inline"(%[[ALLOCA_FLOAT]], %[[MEMSET_VALUE]]) <{isVolatile = false, len = 3 : i64}> + // CHECK-NOT: "llvm.intr.memset.inline" + // 8 bytes means it will span over all the fields, because there is no padding as the struct is packed. + "llvm.intr.memset.inline"(%1, %memset_value) <{isVolatile = false, len = 8}> : (!llvm.ptr, i8) -> () + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> + %3 = llvm.load %2 : !llvm.ptr -> i32 + %4 = llvm.getelementptr %1[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", packed (i8, i32, f32)> + %5 = llvm.load %4 : !llvm.ptr -> f32 + // We use this exotic bitcast to use the f32 easily. Semantics do not matter here. + %6 = llvm.bitcast %5 : f32 to i32 + %7 = llvm.add %3, %6 : i32 + llvm.return %7 : i32 +} + +// ----- + // CHECK-LABEL: llvm.func @memcpy_dest // CHECK-SAME: (%[[OTHER_ARRAY:.*]]: !llvm.ptr) llvm.func @memcpy_dest(%other_array: !llvm.ptr) -> i32 { diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 606b11175f57..e857e252ff08 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -505,6 +505,10 @@ define void @memmove_test(i32 %0, ptr %1, ptr %2) { define void @memset_test(i32 %0, ptr %1, i8 %2) { ; CHECK: "llvm.intr.memset"(%{{.*}}, %{{.*}}, %{{.*}}) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () call void @llvm.memset.p0.i32(ptr %1, i8 %2, i32 %0, i1 false) + ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i64}> : (!llvm.ptr, i8) -> () + call void @llvm.memset.inline.p0.i64(ptr %1, i8 %2, i64 10, i1 false) + ; CHECK: "llvm.intr.memset.inline"(%{{.*}}, %{{.*}}) <{isVolatile = false, len = 10 : i32}> : (!llvm.ptr, i8) -> () + call void @llvm.memset.inline.p0.i32(ptr %1, i8 %2, i32 10, i1 false) ret void } diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index cb712eb4e126..9d45f219cf74 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -533,6 +533,10 @@ llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) { %i1 = llvm.mlir.constant(false) : i1 // CHECK: call void @llvm.memset.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 %{{.*}}, i1 false "llvm.intr.memset"(%arg2, %arg3, %arg0) <{isVolatile = false}> : (!llvm.ptr, i8, i32) -> () + // CHECK: call void @llvm.memset.inline.p0.i32(ptr %{{.*}}, i8 %{{.*}}, i32 10, i1 true + "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i32}> : (!llvm.ptr, i8) -> () + // CHECK: call void @llvm.memset.inline.p0.i64(ptr %{{.*}}, i8 %{{.*}}, i64 10, i1 true + "llvm.intr.memset.inline"(%arg2, %arg3) <{isVolatile = true, len = 10 : i64}> : (!llvm.ptr, i8) -> () llvm.return }