diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0459941fe05cd..0dc513d8e65b7 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1819,6 +1819,10 @@ class TargetTransformInfo { /// \return The maximum number of function arguments the target supports. unsigned getMaxNumArgs() const; + /// \return For an array of given Size, return alignment boundary to + /// pad to. Default is no padding. + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const; + /// @} private: @@ -2225,6 +2229,8 @@ class TargetTransformInfo::Concept { getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; virtual unsigned getMaxNumArgs() const = 0; + virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const = 0; }; template @@ -3026,6 +3032,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned getMaxNumArgs() const override { return Impl.getMaxNumArgs(); } + + unsigned getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const override { + return Impl.getNumBytesToPadGlobalArray(Size, ArrayType); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index dbdfb4d8cdfa3..0b7792f89a05c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1006,6 +1006,10 @@ class TargetTransformInfoImplBase { unsigned getMaxNumArgs() const { return UINT_MAX; } + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const { + return 0; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a47462b61e03b..6070473363761 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1383,6 +1383,12 @@ bool TargetTransformInfo::isVectorShiftByScalarCheap(Type *Ty) const { return TTIImpl->isVectorShiftByScalarCheap(Ty); } +unsigned +TargetTransformInfo::getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const { + return TTIImpl->getNumBytesToPadGlobalArray(Size, ArrayType); +} + TargetTransformInfo::Concept::~Concept() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 835ae98efb852..9f6e5e5ab1421 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -56,6 +56,10 @@ static cl::opt AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops")); +static cl::opt UseWidenGlobalArrays( + "widen-global-strings", cl::Hidden, cl::init(true), + cl::desc("Enable the widening of global strings to alignment boundaries")); + extern cl::opt EnableTailPredication; extern cl::opt EnableMaskedGatherScatters; @@ -2805,3 +2809,32 @@ bool ARMTTIImpl::isProfitableToSinkOperands(Instruction *I, } return true; } + +unsigned ARMTTIImpl::getNumBytesToPadGlobalArray(unsigned Size, + Type *ArrayType) const { + if (!UseWidenGlobalArrays) { + LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n"); + return false; + } + + // Don't modify none integer array types + if (!ArrayType || !ArrayType->isArrayTy() || + !ArrayType->getArrayElementType()->isIntegerTy()) + return 0; + + // We pad to 4 byte boundaries + if (Size % 4 == 0) + return 0; + + unsigned NumBytesToPad = 4 - (Size % 4); + unsigned NewSize = Size + NumBytesToPad; + + // Max number of bytes that memcpy allows for lowering to load/stores before + // it uses library function (__aeabi_memcpy). + unsigned MaxMemIntrinsicSize = getMaxMemIntrinsicInlineSizeThreshold(); + + if (NewSize > MaxMemIntrinsicSize) + return 0; + + return NumBytesToPad; +} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index b0a75134ee02b..3a4f940088b2e 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -337,6 +337,9 @@ class ARMTTIImpl : public BasicTTIImplBase { bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const; + + unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const; + /// @} }; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index aae4926e027ff..4647c65a5c850 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -92,6 +92,8 @@ STATISTIC(NumInternalFunc, "Number of internal functions"); STATISTIC(NumColdCC, "Number of functions marked coldcc"); STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); +STATISTIC(NumGlobalArraysPadded, + "Number of global arrays padded to alignment boundary"); static cl::opt EnableColdCCStressTest("enable-coldcc-stress-test", @@ -2029,6 +2031,165 @@ OptimizeFunctions(Module &M, return Changed; } +static bool callInstIsMemcpy(CallInst *CI) { + if (!CI) + return false; + + Function *F = CI->getCalledFunction(); + if (!F || !F->isIntrinsic() || F->getIntrinsicID() != Intrinsic::memcpy) + return false; + + return true; +} + +static bool destArrayCanBeWidened(CallInst *CI) { + auto *IsVolatile = dyn_cast(CI->getArgOperand(3)); + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + + if (!Alloca || !IsVolatile || IsVolatile->isOne()) + return false; + + if (!Alloca->isStaticAlloca()) + return false; + + if (!Alloca->getAllocatedType()->isArrayTy()) + return false; + + return true; +} + +static GlobalVariable *widenGlobalVariable(GlobalVariable *OldVar, Function *F, + unsigned NumBytesToPad, + unsigned NumBytesToCopy) { + if (!OldVar->hasInitializer()) + return nullptr; + + ConstantDataArray *DataArray = + dyn_cast(OldVar->getInitializer()); + if (!DataArray) + return nullptr; + + // Update to be word aligned (memcpy(...,X,...)) + // create replacement with padded null bytes. + StringRef Data = DataArray->getRawDataValues(); + std::vector StrData(Data.begin(), Data.end()); + for (unsigned int p = 0; p < NumBytesToPad; p++) + StrData.push_back('\0'); + auto Arr = ArrayRef(StrData.data(), NumBytesToCopy + NumBytesToPad); + // Create new padded version of global variable. + Constant *SourceReplace = ConstantDataArray::get(F->getContext(), Arr); + GlobalVariable *NewGV = new GlobalVariable( + *(F->getParent()), SourceReplace->getType(), true, OldVar->getLinkage(), + SourceReplace, SourceReplace->getName()); + // Copy any other attributes from original global variable + // e.g. unamed_addr + NewGV->copyAttributesFrom(OldVar); + NewGV->takeName(OldVar); + return NewGV; +} + +static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, + const unsigned NumBytesToCopy, + ConstantDataArray *SourceDataArray) { + + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + if (Alloca) { + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); + unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; + unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); + // Update destination array to be word aligned (memcpy(X,...,...)) + IRBuilder<> BuildAlloca(Alloca); + AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( + Alloca->getAllocatedType()->getArrayElementType(), NumElementsToCopy)); + NewAlloca->takeName(Alloca); + NewAlloca->setAlignment(Alloca->getAlign()); + Alloca->replaceAllUsesWith(NewAlloca); + Alloca->eraseFromParent(); + } +} + +static bool tryWidenGlobalArrayAndDests(Function *F, GlobalVariable *SourceVar, + const unsigned NumBytesToPad, + const unsigned NumBytesToCopy, + ConstantInt *BytesToCopyOp, + ConstantDataArray *SourceDataArray) { + auto *NewSourceGV = + widenGlobalVariable(SourceVar, F, NumBytesToPad, NumBytesToCopy); + if (!NewSourceGV) + return false; + + // Update arguments of remaining uses that + // are memcpys. + for (auto *User : SourceVar->users()) { + auto *CI = dyn_cast(User); + if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI)) + continue; + + if (CI->getArgOperand(1) != SourceVar) + continue; + + widenDestArray(CI, NumBytesToPad, NumBytesToCopy, SourceDataArray); + + CI->setArgOperand(2, ConstantInt::get(BytesToCopyOp->getType(), + NumBytesToCopy + NumBytesToPad)); + } + SourceVar->replaceAllUsesWith(NewSourceGV); + + NumGlobalArraysPadded++; + return true; +} + +static bool tryWidenGlobalArraysUsedByMemcpy( + GlobalVariable *GV, + function_ref GetTTI) { + + if (!GV->hasInitializer() || !GV->isConstant() || !GV->hasLocalLinkage() || + !GV->hasGlobalUnnamedAddr()) + return false; + + for (auto *User : GV->users()) { + CallInst *CI = dyn_cast(User); + if (!callInstIsMemcpy(CI) || !destArrayCanBeWidened(CI)) + continue; + + Function *F = CI->getCalledFunction(); + + auto *BytesToCopyOp = dyn_cast(CI->getArgOperand(2)); + if (!BytesToCopyOp) + continue; + + ConstantDataArray *SourceDataArray = + dyn_cast(GV->getInitializer()); + if (!SourceDataArray) + continue; + + unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue(); + + auto *Alloca = dyn_cast(CI->getArgOperand(0)); + uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); + uint64_t SZSize = SourceDataArray->getType()->getNumElements(); + unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); + // Calculate the number of elements to copy while avoiding floored + // division of integers returning wrong values i.e. copying one byte + // from an array of i16 would yield 0 elements to copy as supposed to 1. + unsigned NumElementsToCopy = divideCeil(NumBytesToCopy, ElementByteWidth); + + // For safety purposes lets add a constraint and only pad when + // NumElementsToCopy == destination array size == + // source which is a constant + if (NumElementsToCopy != DZSize || DZSize != SZSize) + continue; + + unsigned NumBytesToPad = GetTTI(*F).getNumBytesToPadGlobalArray( + NumBytesToCopy, SourceDataArray->getType()); + if (NumBytesToPad) { + return tryWidenGlobalArrayAndDests(F, GV, NumBytesToPad, NumBytesToCopy, + BytesToCopyOp, SourceDataArray); + } + } + return false; +} + static bool OptimizeGlobalVars(Module &M, function_ref GetTTI, @@ -2058,6 +2219,10 @@ OptimizeGlobalVars(Module &M, continue; } + // For global variable arrays called in a memcpy + // we try to pad to nearest valid alignment boundary + Changed |= tryWidenGlobalArraysUsedByMemcpy(&GV, GetTTI); + Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); } return Changed; diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll new file mode 100644 index 0000000000000..ab04e0a5bc697 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-dest-non-array.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy_struct() { +; CHECK-LABEL: define void @memcpy_struct() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca { i8, i8, i8 }, align 1 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) +; CHECK-NEXT: ret void +; +entry: + %something = alloca {i8, i8, i8}, align 1 + %call1 = call i32 @bar(ptr nonnull %something) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + ret void +} + + +@.i8_multi = private unnamed_addr constant [2 x [3 x i8]] [[3 x i8] [i8 1, i8 2, i8 3], [3 x i8] [i8 4, i8 5, i8 6]] , align 1 + +define void @memcpy_array_multidimensional() { +; CHECK-LABEL: define void @memcpy_array_multidimensional() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [2 x [3 x i8]], align 1 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [2 x [3 x i8]], align 1 + %call1 = call i32 @bar(ptr nonnull %something) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8_multi, i32 3, i1 false) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll new file mode 100644 index 0000000000000..f435ffdeed2c8 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-global-dest.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +; CHECK: [3 x i8] +@other = private unnamed_addr global [3 x i8] [i8 1, i8 2, i8 3] , align 1 +; CHECK: [4 x i8] +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy_multiple() { +; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull @other) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + %call1 = call i32 @bar(ptr nonnull %something) + %call2 = call i32 @bar(ptr nonnull @other) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) @other, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll new file mode 100644 index 0000000000000..c7ca7271fd3d2 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-byte-array.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i16 = private unnamed_addr constant [5 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5] , align 1 + +define void @memcpy_i16_array() { +; CHECK-LABEL: define void @memcpy_i16_array() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [6 x i16], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 12, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [5 x i16], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.i16, i32 10, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll new file mode 100644 index 0000000000000..3d9c42fe1f3dd --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-non-const-global.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.str = unnamed_addr global [3 x i8] c"12\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.str, i32 3, i1 false) + %call1 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll new file mode 100644 index 0000000000000..e37925a78d2c3 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-string-multi-use.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy_multiple() { +; CHECK-LABEL: define void @memcpy_multiple() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING2:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[SOMETHING1:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[SOMETHING3:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[SOMETHING3]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING2]]) +; CHECK-NEXT: [[CALL3:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING1]]) +; CHECK-NEXT: [[CALL4:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING3]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [3 x i8], align 1 + %something1 = alloca [3 x i8], align 1 + %something2 = alloca [3 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something1, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %something2, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + %call3 = call i32 @bar(ptr nonnull %something1) + %call4 = call i32 @bar(ptr nonnull %something2) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll new file mode 100644 index 0000000000000..8ea9e2804370e --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-1.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.str = private unnamed_addr constant [10 x i8] c"123456789\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [12 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 12, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [10 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) %something, ptr noundef nonnull align 1 dereferenceable(10) @.str, i32 10, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll new file mode 100644 index 0000000000000..ad3620b14ea23 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-2.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [64 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) [[SOMETHING]], ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 64, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [62 x i8], align 1 + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(62) %something, ptr noundef nonnull align 1 dereferenceable(62) @.str, i32 62, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) + ret void +} + +declare i32 @bar(...) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll new file mode 100644 index 0000000000000..b8e02c3f996da --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-lengths-dont-match.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s +; CHECK: [17 x i8] +@.str = private unnamed_addr constant [17 x i8] c"aaaaaaaaaaaaaaaa\00", align 1 + +; Function Attrs: nounwind +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [20 x i8], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[SOMETHING]], ptr align 1 @.str, i32 17, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [20 x i8], align 1 + call void @llvm.lifetime.start(i64 20, ptr nonnull %something) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 17, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) #3 + call void @llvm.lifetime.end(i64 20, ptr nonnull %something) #3 + ret void +} + +declare i32 @bar(...) #2 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll new file mode 100644 index 0000000000000..4ac31aa2f976d --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-more-than-64-bytes.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +; CHECK: [65 x i8] +; CHECK-NOT: [68 x i8] +@.str = private unnamed_addr constant [65 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzz\00", align 1 + +; Function Attrs: nounwind +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [65 x i8], align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 65, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[SOMETHING]], ptr align 1 @.str, i32 65, i1 false) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 65, ptr nonnull [[SOMETHING]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [65 x i8], align 1 + call void @llvm.lifetime.start(i64 65, ptr nonnull %something) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %something, ptr align 1 @.str, i32 65, i1 false) + %call2 = call i32 @bar(ptr nonnull %something) #3 + call void @llvm.lifetime.end(i64 65, ptr nonnull %something) #3 + ret void +} + +declare i32 @bar(...) #2 diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll new file mode 100644 index 0000000000000..64f57884cd39e --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-ptrtoint.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@f.string1 = private unnamed_addr constant [45 x i8] c"The quick brown dog jumps over the lazy fox.\00", align 1 + +; Function Attrs: nounwind +define i32 @f() { +; CHECK-LABEL: define i32 @f() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[STRING1:%.*]] = alloca [48 x i8], align 1 +; CHECK-NEXT: [[POS:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TOKEN:%.*]] = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 45, ptr [[STRING1]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[STRING1]], ptr align 1 @f.string1, i32 48, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[POS]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TOKEN]]) +; CHECK-NEXT: [[CALL:%.*]] = call ptr @strchr(ptr [[STRING1]], i32 101) +; CHECK-NEXT: store ptr [[CALL]], ptr [[TOKEN]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TOKEN]], align 4 +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP1]] to i32 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STRING1]] to i32 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i32 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SUB_PTR_SUB]], 1 +; CHECK-NEXT: store i32 [[ADD]], ptr [[POS]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[POS]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TOKEN]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[POS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 45, ptr [[STRING1]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; +entry: + %string1 = alloca [45 x i8], align 1 + %pos = alloca i32, align 4 + %token = alloca ptr, align 4 + call void @llvm.lifetime.start.p0i8(i64 45, ptr %string1) + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %string1, ptr align 1 @f.string1, i32 45, i1 false) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.start.p0i8(i64 4, ptr %token) + %call = call ptr @strchr(ptr %string1, i32 101) + store ptr %call, ptr %token, align 4 + %0 = load ptr, ptr %token, align 4 + %sub.ptr.lhs.cast = ptrtoint ptr %0 to i32 + %sub.ptr.rhs.cast = ptrtoint ptr %string1 to i32 + %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %add = add nsw i32 %sub.ptr.sub, 1 + store i32 %add, ptr %pos, align 4 + %1 = load i32, ptr %pos, align 4 + call void @llvm.lifetime.end.p0i8(i64 4, ptr %token) + call void @llvm.lifetime.end.p0i8(i64 4, ptr %pos) + call void @llvm.lifetime.end.p0i8(i64 45, ptr %string1) + ret i32 %1 +} + +declare ptr @strchr(ptr, i32) diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll new file mode 100644 index 0000000000000..5367572704b14 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-struct-test.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s +%struct.P = type { i32, [13 x i8] } + +; CHECK-NOT: [16 x i8] +@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1 + +; Function Attrs: nounwind +define i32 @main() { +; CHECK-LABEL: define i32 @main() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_P:%.*]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr nonnull [[P]]) +; CHECK-NEXT: store i32 10, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [[STRUCT_P]], ptr [[P]], i32 0, i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[ARRAYDECAY]], ptr align 1 @.str, i32 13, i1 false) +; CHECK-NEXT: [[PUTS:%.*]] = call i32 @puts(ptr [[ARRAYDECAY]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr nonnull [[P]]) +; CHECK-NEXT: ret i32 0 +; +entry: + %p = alloca %struct.P, align 4 + call void @llvm.lifetime.start(i64 20, ptr nonnull %p) #2 + store i32 10, ptr %p, align 4, !tbaa !1 + %arraydecay = getelementptr inbounds %struct.P, ptr %p, i32 0, i32 1, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 %arraydecay, ptr align 1 @.str, i32 13, i1 false) + %puts = call i32 @puts(ptr %arraydecay) + call void @llvm.lifetime.end(i64 20, ptr nonnull %p) #2 + ret i32 0 +} + +declare i32 @puts(ptr nocapture readonly) #2 + +!1 = !{!2, !3, i64 0} +!2 = !{!"P", !3, i64 0, !4, i64 4} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} +; CHECK: [[META1]] = !{!"P", [[META2]], i64 0, [[META3:![0-9]+]], i64 4} +; CHECK: [[META2]] = !{!"int", [[META3]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +;. diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll new file mode 100644 index 0000000000000..b735a77887423 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-strings-volatile.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +; CHECK-NOT: [64 x i8] +@.str = private unnamed_addr constant [62 x i8] c"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1 + +; Function Attrs: nounwind +define void @foo() { +; CHECK-LABEL: define void @foo() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SOMETHING:%.*]] = alloca [62 x i8], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [62 x i8], ptr [[SOMETHING]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 62, ptr nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 1 [[TMP0]], ptr align 1 @.str, i32 62, i1 true) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[TMP0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 62, ptr nonnull [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %something = alloca [62 x i8], align 1 + %0 = getelementptr inbounds [62 x i8], ptr %something, i32 0, i32 0 + call void @llvm.lifetime.start(i64 62, ptr nonnull %0) #3 + call void @llvm.memcpy.p0i8.p0i8.i32(ptr align 1 nonnull %0, ptr align 1 @.str, i32 62, i1 true) + %call2 = call i32 @bar(ptr nonnull %0) #3 + call void @llvm.lifetime.end(i64 62, ptr nonnull %0) #3 + ret void +} + +declare i32 @bar(...) #2