From f89ba52f91b14dac3c24b3d93461426ebc1db222 Mon Sep 17 00:00:00 2001 From: youxiang Date: Wed, 10 Apr 2019 22:11:18 -0700 Subject: [PATCH 1/2] test --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0ed7d42675789..d1a8775ad88fe 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Intel Project for LLVM* technology + ## Introduction Intel staging area for llvm.org contribution. From 70901fc06d3c998184fee897a2c490b9829996b9 Mon Sep 17 00:00:00 2001 From: Min Gao Date: Mon, 15 Apr 2019 15:49:48 -0700 Subject: [PATCH 2/2] lower struct memcpy draft --- llvm/lib/Transforms/CMakeLists.txt | 1 + .../lib/Transforms/LowerMemcpy/CMakeLists.txt | 20 ++ .../Transforms/LowerMemcpy/LowerMemcpy.cpp | 283 ++++++++++++++++++ .../LowerMemcpy/LowerMemcpy.exports | 0 llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.h | 23 ++ 5 files changed, 327 insertions(+) create mode 100644 llvm/lib/Transforms/LowerMemcpy/CMakeLists.txt create mode 100644 llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.cpp create mode 100644 llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.exports create mode 100644 llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.h diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 74db9e53304da..18a1c07d4a046 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -8,3 +8,4 @@ add_subdirectory(Vectorize) add_subdirectory(Hello) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) +add_subdirectory(LowerMemcpy) diff --git a/llvm/lib/Transforms/LowerMemcpy/CMakeLists.txt b/llvm/lib/Transforms/LowerMemcpy/CMakeLists.txt new file mode 100644 index 0000000000000..3b40e7fae8404 --- /dev/null +++ b/llvm/lib/Transforms/LowerMemcpy/CMakeLists.txt @@ -0,0 +1,20 @@ +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +if( NOT LLVM_REQUIRES_RTTI ) + if( NOT LLVM_REQUIRES_EH ) + set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/LowerMemcpy.exports) + endif() +endif() + +if(WIN32 OR CYGWIN) + set(LLVM_LINK_COMPONENTS Core Support) +endif() + +add_llvm_library( LLVMLowerMemcpy MODULE BUILDTREE_ONLY + LowerMemcpy.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) diff --git a/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.cpp b/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.cpp new file mode 100644 index 0000000000000..9f33e398ec5b6 --- /dev/null +++ b/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.cpp @@ -0,0 +1,283 @@ +//===- LowerMemcpy.cpp - ------------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when +// the size is large or is not a compile-time constant. +// +//===----------------------------------------------------------------------===// + +#include "LowerMemcpy.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" + +#define DEBUG_TYPE "nvptx" + +using namespace llvm; + +namespace { + +// actual analysis class, which is a functionpass +struct LowerMemcpy : public InstVisitor, + public FunctionPass { + + void visit(Value& V) { + if (Instruction *I = dyn_cast(&V)) { + visit(*I); + } + } + + void visit(Instruction& I) { + if (BitCastInst *BCI = dyn_cast(&I)) { + visitBitCastInst(*BCI); + } else if (MemCpyInst *MCI = dyn_cast(&I)) { + visitMemCpyInst(*MCI); + } + } + + void visitBitCastInst(BitCastInst& I) { + data_type = I.getSrcTy()->getPointerElementType(); + data_addr = I.getOperand(0); + } + + void visitMemCpyInst(MemCpyInst& I) { + Value *ori_src_addr = I.getRawSource(); + Value *ori_dst_addr = I.getRawDest(); + visit(*ori_src_addr); + Type *src_type = data_type; + Value *src_addr = data_addr; + visit(*ori_dst_addr); + Type *dst_type = data_type; + Value *dst_addr = data_addr; + + ConstantInt *CI = dyn_cast(I.getLength()); + if (CI != nullptr) { + createMemCpyLoopKnownSize(I, + src_addr, src_type, + dst_addr, dst_type, + CI); + } else { + createMemCpyLoopUnknownSize(I, + src_addr, src_type, + dst_addr, dst_type); + } + } + + void createMemCpyLoopKnownSize(MemCpyInst &I, + Value *src_addr, + Type *src_type, + Value *dst_addr, + Type *dst_type, + ConstantInt *copy_len) { + if (src_addr == nullptr || src_type == nullptr || + dst_addr == nullptr || dst_type == nullptr) + return; + + if (src_type != dst_type) + return; + + if (!src_type->isStructTy()) + return; + + if (copy_len->isZero()) + return; + + BasicBlock *pre_loop_BB = I.getParent(); + BasicBlock *post_loop_BB = nullptr; + Function *parent_fn = pre_loop_BB->getParent(); + LLVMContext &ctx = pre_loop_BB->getContext(); + auto dl = parent_fn->getParent()->getDataLayout(); + + Type *iter_type = copy_len->getType(); + StructType *copy_type = dyn_cast(src_type); + auto *sdl = dl.getStructLayout(copy_type); + uint64_t struct_size = sdl->getSizeInBytes(); + uint64_t loop_count = copy_len->getZExtValue()/struct_size; + + + if (loop_count == 1) { + IRBuilder<> InstBuilder(&I); + Value *load = InstBuilder.CreateLoad(copy_type, src_addr); + InstBuilder.CreateStore(load, dst_addr); + } else if (loop_count > 0){ + //split + post_loop_BB = pre_loop_BB->splitBasicBlock(&I, "memcpy_split"); + BasicBlock *loop_BB = + BasicBlock::Create(ctx, "load-store-loop", parent_fn, post_loop_BB); + pre_loop_BB->getTerminator()->setSuccessor(0, loop_BB); + + // fill-in loop BB + // iter + IRBuilder<> LoopBuilder(loop_BB); + PHINode *loop_index = LoopBuilder.CreatePHI(iter_type, 2, "loop-index"); + loop_index->addIncoming(ConstantInt::get(iter_type, 0U), pre_loop_BB); + // assignment + Value *src_GEP = + LoopBuilder.CreateInBoundsGEP(copy_type, src_addr, loop_index); + Value *load = LoopBuilder.CreateLoad(copy_type, src_GEP); + Value *dst_GEP = + LoopBuilder.CreateInBoundsGEP(copy_type, dst_addr, loop_index); + LoopBuilder.CreateStore(load, dst_GEP); + // inc + Value *new_index = + LoopBuilder.CreateAdd(loop_index, ConstantInt::get(iter_type, 1U)); + loop_index->addIncoming(new_index, loop_BB); + // br + Constant *loop_ci = ConstantInt::get(iter_type, loop_count); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(new_index, loop_ci), + loop_BB, post_loop_BB); + //metadata + MDNode *loop_id = MDNode::get(ctx, MDString::get(ctx, "llvm.loop")); + loop_BB->getTerminator()->setMetadata(LLVMContext::MD_loop, loop_id); + } + } + + void createMemCpyLoopUnknownSize(MemCpyInst &I, + Value *src_addr, + Type *src_type, + Value *dst_addr, + Type *dst_type) { + if (src_addr == nullptr || src_type == nullptr || + dst_addr == nullptr || dst_type == nullptr) + return; + + if (src_type != dst_type) + return; + + if (!src_type->isStructTy()) + return; + + Value *copy_len = I.getLength(); + + BasicBlock *pre_loop_BB = I.getParent(); + BasicBlock *post_loop_BB = + pre_loop_BB->splitBasicBlock(&I, "post-loop-memcpy-expansion"); + Function *parent_fn = pre_loop_BB->getParent(); + LLVMContext &ctx = pre_loop_BB->getContext(); + auto &dl = parent_fn->getParent()->getDataLayout(); + + IRBuilder<> PLBuilder(pre_loop_BB->getTerminator()); + + Type *iter_type = copy_len->getType(); + IntegerType *i_len_ty = dyn_cast(iter_type); + StructType *copy_type = dyn_cast(src_type); + auto *sdl = dl.getStructLayout(copy_type); + uint64_t struct_size = sdl->getSizeInBytes(); + ConstantInt *ci_struct_size = ConstantInt::get(i_len_ty, struct_size); + Value *run_time_loop_cnt = PLBuilder.CreateUDiv(copy_len, ci_struct_size); + + BasicBlock *loop_BB = + BasicBlock::Create(ctx, "load-memcpy-expansion", parent_fn, post_loop_BB); + pre_loop_BB->getTerminator()->setSuccessor(0, loop_BB); + + IRBuilder<> LoopBuilder(loop_BB); + + // fill-in loop BB + // iter + PHINode *loop_index = LoopBuilder.CreatePHI(iter_type, 2, "loop-index"); + loop_index->addIncoming(ConstantInt::get(iter_type, 0U), pre_loop_BB); + // assignment + Value *src_GEP = + LoopBuilder.CreateInBoundsGEP(copy_type, src_addr, loop_index); + Value *load = LoopBuilder.CreateLoad(copy_type, src_GEP); + Value *dst_GEP = + LoopBuilder.CreateInBoundsGEP(copy_type, dst_addr, loop_index); + LoopBuilder.CreateStore(load, dst_GEP); + // inc + Value *new_index = + LoopBuilder.CreateAdd(loop_index, ConstantInt::get(iter_type, 1U)); + loop_index->addIncoming(new_index, loop_BB); + // br + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(new_index, run_time_loop_cnt), + loop_BB, post_loop_BB); + //metadata + MDNode *loop_id = MDNode::get(ctx, MDString::get(ctx, "llvm.loop")); + loop_BB->getTerminator()->setMetadata(LLVMContext::MD_loop, loop_id); + } + + Type *data_type; + Value *data_addr; + + static char ID; + + LowerMemcpy() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addRequired(); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Lower struct memcpys into loops"; + } +}; + +char LowerMemcpy::ID = 0; +static RegisterPass X("lower-memcpy", "lower memcpy to for loops"); + +bool LowerMemcpy::runOnFunction(Function &F) { + SmallVector memcpy_calls; + + // const DataLayout &DL = F.getParent()->getDataLayout(); + // LLVMContext &Context = F.getParent()->getContext(); + // const TargetTransformInfo &TTI = + // getAnalysis().getTTI(F); + + // Collect all aggregate loads and mem* calls. + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + if (MemCpyInst *memcpy_call = dyn_cast(II)) { + memcpy_calls.push_back(memcpy_call); + } + } + } + + if (memcpy_calls.size() == 0) { + return false; + } + + // Transform memcpy calls. + for (MemCpyInst *memcpy_call : memcpy_calls) { + visitMemCpyInst(*memcpy_call); + memcpy_call->eraseFromParent(); + } + + return true; +} + +} // namespace + +namespace llvm { +void initializeLowerMemcpyPass(PassRegistry &); +} + +// use hello.cpp +INITIALIZE_PASS(LowerMemcpy, "lowememcpy", + "Lower struct llvm.mem* intrinsics into loops", + false, false) + +//FunctionPass *llvm::createMemcpy() { +//// return new LowerMemcpy(); +// return nullptr; +//} diff --git a/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.exports b/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.exports new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.h b/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.h new file mode 100644 index 0000000000000..0e9e2f4d32f1d --- /dev/null +++ b/llvm/lib/Transforms/LowerMemcpy/LowerMemcpy.h @@ -0,0 +1,23 @@ +//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVIDIA specific lowering of +// aggregate copies +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H + +namespace llvm { +class FunctionPass; + +FunctionPass *createLowerMemcpy(); +} + +#endif