Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -226,7 +226,7 @@ void initializeMemorySSAWrapperPassPass(PassRegistry&); void initializeMemorySanitizerPass(PassRegistry&); void initializeMergeFunctionsPass(PassRegistry&); -void initializeMergedLoadStoreMotionPass(PassRegistry &); +void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry &); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &); Index: include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h =================================================================== --- include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h +++ include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h @@ -0,0 +1,99 @@ +//===- MergedLoadStoreMotion.h - merge and hoist/sink load/stores ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//! \file +//! \brief This pass performs merges of loads and stores on both sides of a +// diamond (hammock). It hoists the loads and sinks the stores. +// +// The algorithm iteratively hoists two loads to the same address out of a +// diamond (hammock) and merges them into a single load in the header. Similar +// it sinks and merges two stores to the tail block (footer). The algorithm +// iterates over the instructions of one side of the diamond and attempts to +// find a matching load/store on the other side. It hoists / sinks when it +// thinks it safe to do so. This optimization helps with eg. hiding load +// latencies, triggering if-conversion, and reducing static code size. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H +#define LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +namespace llvm { + +class MergedLoadStoreMotion { + MemoryDependenceResults *MD = nullptr; + AliasAnalysis *AA = nullptr; + + // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, + // where Size0 and Size1 are the #instructions on the two sides of + // the diamond. The constant chosen here is arbitrary. Compiler Time + // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. + const int MagicCompileTimeControl = 250; + +public: + bool run(Function &F, MemoryDependenceResults *MD, AliasAnalysis &AA); + +private: + /// + /// \brief Remove instruction from parent and update memory dependence + /// analysis. + /// + void removeInstruction(Instruction *Inst); + BasicBlock *getDiamondTail(BasicBlock *BB); + bool isDiamondHead(BasicBlock *BB); + // Routines for hoisting loads + bool isLoadHoistBarrierInRange(const Instruction &Start, + const Instruction &End, LoadInst *LI, + bool SafeToLoadUnconditionally); + LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); + void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, + Instruction *ElseInst); + bool isSafeToHoist(Instruction *I) const; + bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); + bool mergeLoads(BasicBlock *BB); + // Routines for sinking stores + StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); + PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); + bool isStoreSinkBarrierInRange(const Instruction &Start, + const Instruction &End, MemoryLocation Loc); + bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); + bool mergeStores(BasicBlock *BB); +}; + +class MergedLoadStoreMotionPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, AnalysisManager &AM); + +private: + // Implementation class, so that we can share code between the old + // and the new pass manager. + MergedLoadStoreMotion Impl; +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_MERGEDLOADSTOREMOTION_H Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -123,7 +123,7 @@ initializeReversePostOrderFunctionAttrsLegacyPassPass(R); initializeGlobalsAAWrapperPassPass(R); initializeLICMPass(R); - initializeMergedLoadStoreMotionPass(R); + initializeMergedLoadStoreMotionLegacyPassPass(R); initializeGVNLegacyPassPass(R); initializeMemCpyOptLegacyPassPass(R); initializeDCELegacyPassPass(R); Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -83,14 +83,15 @@ #include "llvm/Transforms/Scalar/LowerAtomic.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Scalar/SLPVectorizer.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" -#include "llvm/Transforms/Scalar/SLPVectorizer.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MemorySSA.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -134,6 +134,7 @@ FUNCTION_PASS("gvn", GVN()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) +FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) Index: lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -72,6 +72,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" @@ -92,83 +93,6 @@ #define DEBUG_TYPE "mldst-motion" -//===----------------------------------------------------------------------===// -// MergedLoadStoreMotion Pass -//===----------------------------------------------------------------------===// - -namespace { -class MergedLoadStoreMotion : public FunctionPass { - AliasAnalysis *AA; - MemoryDependenceResults *MD; - -public: - static char ID; // Pass identification, replacement for typeid - MergedLoadStoreMotion() - : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) { - initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - -private: - // This transformation requires dominator postdominator info - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - } - - // Helper routines - - /// - /// \brief Remove instruction from parent and update memory dependence - /// analysis. - /// - void removeInstruction(Instruction *Inst); - BasicBlock *getDiamondTail(BasicBlock *BB); - bool isDiamondHead(BasicBlock *BB); - // Routines for hoisting loads - bool isLoadHoistBarrierInRange(const Instruction &Start, - const Instruction &End, LoadInst *LI, - bool SafeToLoadUnconditionally); - LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI); - void hoistInstruction(BasicBlock *BB, Instruction *HoistCand, - Instruction *ElseInst); - bool isSafeToHoist(Instruction *I) const; - bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst); - bool mergeLoads(BasicBlock *BB); - // Routines for sinking stores - StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); - PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); - bool isStoreSinkBarrierInRange(const Instruction &Start, - const Instruction &End, MemoryLocation Loc); - bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); - bool mergeStores(BasicBlock *BB); - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of - // the diamond. The constant chosen here is arbitrary. Compiler Time - // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. - const int MagicCompileTimeControl; -}; - -char MergedLoadStoreMotion::ID = 0; -} // anonymous namespace - -/// -/// \brief createMergedLoadStoreMotionPass - The public interface to this file. -/// -FunctionPass *llvm::createMergedLoadStoreMotionPass() { - return new MergedLoadStoreMotion(); -} - -INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion", - "MergedLoadStoreMotion", false, false) -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion", - "MergedLoadStoreMotion", false, false) - /// /// \brief Remove instruction from parent and update memory dependence analysis. /// @@ -552,16 +476,10 @@ return MergedStores; } -/// -/// \brief Run the transformation for each function -/// -bool MergedLoadStoreMotion::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - auto *MDWP = getAnalysisIfAvailable(); - MD = MDWP ? &MDWP->getMemDep() : nullptr; - AA = &getAnalysis().getAAResults(); +bool MergedLoadStoreMotion::run(Function &F, MemoryDependenceResults *MD, + AliasAnalysis &AA) { + this->MD = MD; + this->AA = &AA; bool Changed = false; DEBUG(dbgs() << "Instruction Merger\n"); @@ -580,3 +498,66 @@ } return Changed; } + +namespace { +class MergedLoadStoreMotionLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + MergedLoadStoreMotionLegacyPass() : FunctionPass(ID) { + initializeMergedLoadStoreMotionLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + /// + /// \brief Run the transformation for each function + /// + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto *MDWP = getAnalysisIfAvailable(); + return Impl.run(F, MDWP ? &MDWP->getMemDep() : nullptr, + getAnalysis().getAAResults()); + } + +private: + // This transformation requires dominator postdominator info + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } + MergedLoadStoreMotion Impl; +}; + +char MergedLoadStoreMotionLegacyPass::ID = 0; +} // anonymous namespace + +/// +/// \brief createMergedLoadStoreMotionPass - The public interface to this file. +/// +FunctionPass *llvm::createMergedLoadStoreMotionPass() { + return new MergedLoadStoreMotionLegacyPass(); +} + +INITIALIZE_PASS_BEGIN(MergedLoadStoreMotionLegacyPass, "mldst-motion", + "MergedLoadStoreMotion", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion", + "MergedLoadStoreMotion", false, false) + +PreservedAnalyses +MergedLoadStoreMotionPass::run(Function &F, AnalysisManager &AM) { + auto *MD = AM.getCachedResult(F); + auto &AA = AM.getResult(F); + if (!Impl.run(F, MD, AA)) + return PreservedAnalyses::all(); + + // FIXME: This pass should also 'preserve the CFG'. + // The new pass manager has currently no way to do it. + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -65,7 +65,7 @@ initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); - initializeMergedLoadStoreMotionPass(Registry); + initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociatePass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); Index: test/Transforms/InstMerge/exceptions.ll =================================================================== --- test/Transforms/InstMerge/exceptions.ll +++ test/Transforms/InstMerge/exceptions.ll @@ -1,4 +1,7 @@ ; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require',mldst-motion \ +; RUN: -S < %s | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"