Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -133,6 +133,7 @@ void initializeDataFlowSanitizerPass(PassRegistry&); void initializeScalarizerPass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry &); +void initializeGVNHoistLegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -156,6 +156,7 @@ (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createEarlyCSEPass(); + (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -328,6 +328,13 @@ //===----------------------------------------------------------------------===// // +// GVNHoist - This pass performs a simple and fast GVN pass over the dominator +// tree to hoist common expressions from sibling branches. +// +FunctionPass *createGVNHoistPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -58,11 +58,7 @@ AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceResults &getMemDep() const { return *MD; } -private: - friend class gvn::GVNLegacyPass; - struct Expression; - friend struct DenseMapInfo; /// This class holds the mapping between values and value numbers. It is used /// as an efficient mechanism to determine the expression-wise equivalence of @@ -105,6 +101,10 @@ void verifyRemoved(const Value *) const; }; +private: + friend class gvn::GVNLegacyPass; + friend struct DenseMapInfo; + MemoryDependenceResults *MD; DominatorTree *DT; const TargetLibraryInfo *TLI; @@ -229,6 +229,13 @@ /// loads are eliminated by the pass. FunctionPass *createGVNPass(bool NoLoads = false); +/// \brief A simple and fast domtree-based GVN pass to hoist common expressions +/// from sibling branches. +struct GVNHoistPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager &AM); +}; + } #endif Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -95,6 +95,7 @@ FUNCTION_PASS("aa-eval", AAEvaluator()) FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("early-cse", EarlyCSEPass()) +FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -210,6 +210,7 @@ else FPM.add(createScalarReplAggregatesPass()); FPM.add(createEarlyCSEPass()); + FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -11,6 +11,7 @@ FlattenCFGPass.cpp Float2Int.cpp GVN.cpp + GVNHoist.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -0,0 +1,485 @@ +//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass hoists expressions from branches to a common dominator. This pass +// uses GVN (global value numbering) to discover expressions computing the same +// values. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-hoist" + +STATISTIC(NumHoisted, "Number of hoisted instructions"); +STATISTIC(NumRemoved, "Number of instructions removed"); + +static cl::opt HoistedScalarsThreshold( + "hoisted-scalars-threshold", cl::Hidden, cl::init(-1), + cl::desc("Max number of scalar instructions to hoist " + "(default unlimited = -1)")); +static cl::opt + HoistedLoadsThreshold("hoisted-loads-threshold", cl::Hidden, cl::init(-1), + cl::desc("Max number of loads to hoist " + "(default unlimited = -1)")); +static int HoistedScalarsCtr = 0; +static int HoistedLoadsCtr = 0; + +namespace { +// This pass hoists common computations across branches sharing +// common immediate dominator. The primary goal is to reduce the code size, +// and in some cases reduce critical path (by exposing more ILP). +class GVNHoistLegacyPassImpl { +public: + GVN::ValueTable VN; + DominatorTree *DT; + AliasAnalysis *AA; + MemoryDependenceResults *MD; + LoopInfo *LI; + static char ID; + + GVNHoistLegacyPassImpl(DominatorTree *Dt, AliasAnalysis *Aa, + MemoryDependenceResults *Md, LoopInfo *Li) + : DT(Dt), AA(Aa), MD(Md), LI(Li) {} + + // Return true when there are side effects on any path from InsertBB to Load. + template + bool hasSideEffects(BasicBlock *InsertBB, std::set &Paths, + std::vector &InstructionsToHoist, + std::set &HasSideEffects, + std::set &NoSideEffects, + Fun checkForSideEffects) { + for (Instruction *Insn : InstructionsToHoist) { + BasicBlock *BB = Insn->getParent(); + + // We may already know that there are no side effects in the whole BB. + if (NoSideEffects.find(BB) != NoSideEffects.end()) + continue; + + if (BB == InsertBB) { + // Check for side effects from the load to the end of the block. As the + // load is already in InsertBB, the code generation will leave it at the + // current place, so we need to make sure that there are no side effects + // from the load to the end of the block. + for (BasicBlock::iterator I(Insn); I != InsertBB->end(); ++I) + if (checkForSideEffects(&*I)) + return true; + continue; + } + + // Check for side effects from the beginning of BB to Insn. + for (Instruction &I1 : *BB) { + // Do not check for side effects after Insn. + if (&I1 == Insn) + break; + if (checkForSideEffects(&I1)) + return true; + } + } + + // Check for side effects all instructions on all the paths. + for (BasicBlock *BB : Paths) { + // Check in the cache first. + if (HasSideEffects.find(BB) != HasSideEffects.end()) + return true; + if (NoSideEffects.find(BB) != NoSideEffects.end()) + continue; + + for (Instruction &I1 : *BB) + if (checkForSideEffects(&I1)) { + // Record that BB has side effects. + HasSideEffects.insert(BB); + return true; + } + + // Record that BB does not have side effects. + NoSideEffects.insert(BB); + } + + return false; + } + + // Return true when there are exception handling blocks on the execution path + // or at hoisting sites. + bool hasEH(std::set &Path, + std::vector &InstructionsToHoist) { + for (BasicBlock *BB : Path) + if (BB->isEHPad() || BB->hasAddressTaken()) + return true; + + for (Instruction *I : InstructionsToHoist) { + BasicBlock *BB = I->getParent(); + if (BB->isEHPad() || BB->hasAddressTaken()) + return true; + } + + return false; + } + + // Return true when all paths from InsertBB to the end of the function contain + // one of the instructions in Insns. Return false when there exists a path + // from InsertBB to the end of the function that does not pass through one of + // the Insns. + bool hoistingFromAllPaths(BasicBlock *InsertBB, + std::vector &Insns) { + // Create a work list of all the BB of the Insns to be hoisted. + std::set WL; + for (Instruction *I : Insns) { + BasicBlock *BB = I->getParent(); + + // When InsertBB already contains an instruction to be hoisted, the + // expression is needed on all paths. + if (BB == InsertBB) + return true; + + WL.insert(BB); + } + + for (auto It = df_begin(InsertBB), E = df_end(InsertBB); It != E;) { + // There exists a path from InsertBB to the exit of the function if we + // are still iterating in DF traversal and we removed all instructions + // from the work list. + if (WL.empty()) + return false; + + BasicBlock *BB = *It; + if (WL.erase(BB)) { + // Stop DFS traversal when BB is in the work list. + It.skipChildren(); + continue; + } + + // Check for end of function, calls that do not return, etc. + if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) + return false; + + // Increment DFS traversal when not skipping children. + ++It; + } + + return true; + } + + // A multimap from a VN (value number) to all the instructions with that VN. + typedef std::multimap VNtoInsns; + // Each element of a hoisting list contains the basic block where to hoist and + // a list of instructions to be hoisted. + typedef std::vector>> + HoistingPointList; + + void computeInsertionPoints(VNtoInsns &Map, HoistingPointList &HPL, + bool IsLoad) { + std::set HasSideEffects, NoSideEffects; + std::set HasCalls, NoCalls; + for (auto It = Map.begin(); It != Map.end(); + It = Map.upper_bound(It->first)) { + unsigned V = It->first; + if (Map.count(V) < 2) + continue; + + if (IsLoad) { + HoistedLoadsCtr++; + if (HoistedLoadsThreshold != -1 && + HoistedLoadsCtr > HoistedLoadsThreshold) + continue; + } else { + HoistedScalarsCtr++; + if (HoistedScalarsThreshold != -1 && + HoistedScalarsCtr > HoistedScalarsThreshold) + continue; + } + + // Compute the insertion point and the list of expressions to be hoisted. + auto R = Map.equal_range(V); + auto First = R.first; + auto Last = R.second; + std::vector InstructionsToHoist; + Instruction *I = First->second; + InstructionsToHoist.push_back(I); + BasicBlock *InsertBB = I->getParent(); + ++First; + for (; First != Last; ++First) { + I = First->second; + InstructionsToHoist.push_back(I); + InsertBB = DT->findNearestCommonDominator(InsertBB, I->getParent()); + } + + // Check that the hoisted expression is needed on all paths: it is unsafe + // to hoist loads to a place where there may be a path not loading from + // the same address: for instance there may be a branch on which the + // address of the load may not be initialized. FIXME: at -Oz we may want + // to hoist scalars to a place where they are partially needed. + if (!hoistingFromAllPaths(InsertBB, InstructionsToHoist)) + continue; + + // Check for unsafe hoistings due to side effects. + + // Collect all blocks on all execution Paths from InsertBB to the + // instructions to be hoisted. + std::set Paths; + for (Instruction *I : InstructionsToHoist) { + BasicBlock *BBI = I->getParent(); + + // We may need to keep BBI in the Paths set if we have already added it + // to Paths for another expression. + bool Keep = false; + if (Paths.count(BBI)) + Keep = true; + + // Record in Paths all basic blocks reachable in depth-first iteration + // on the inverse CFG from BBI to InsertBB. These blocks are all the + // blocks that may be executed between the execution of InsertBB and + // BBI. Hoisting an expression from BBI into InsertBB has to be safe on + // all execution paths. + for (auto It = idf_ext_begin(BBI, Paths), E = idf_ext_end(BBI, Paths); + It != E;) + if (*It == InsertBB) + // Stop traversal when reaching InsertBB. + It.skipChildren(); + else + ++It; + + // When hoisting an expression out of a loop, we need to check the + // safety of all instructions of all surrounding loops. + if (Loop *LB = LI->getLoopFor(BBI)) { + Keep = true; + Loop *L = LI->getLoopFor(InsertBB); + while (LB->getParentLoop() && LB->getParentLoop() != L) + LB = LB->getParentLoop(); + + for (BasicBlock *BB : LB->blocks()) + Paths.insert(BB); + + // FIXME: LoopInfo is not enough to check: for the moment, loop info + // does not tag BBs in irreducible loops, and so we would not be able + // to discard hoistings in irreducible SCCs. We need LoopInfo to + // provide the set of all BBs belonging to irreducible SCCs. + + // FIXME: We may want to refine the check with some minimal dependence + // analysis or memory SSA in order to allow more instructions to be + // hoisted from loops. + } + + // Safety check for BBI will be handled separately. + if (!Keep) + Paths.erase(BBI); + } + // Safety check for InsertBB will be handled separately. + Paths.erase(InsertBB); + + // It is unsafe to Hoist scalar or load expressions past regions of code + // with exception handling and calls. + if (hasEH(Paths, InstructionsToHoist) || + hasSideEffects( + InsertBB, Paths, InstructionsToHoist, HasCalls, NoCalls, + [](Instruction *I) -> bool { return isa(I); })) + continue; + + if (!IsLoad) { + HPL.push_back(std::make_pair(InsertBB, InstructionsToHoist)); + continue; + } + + // It is also unsafe to hoist loads past stores. FIXME: this restriction + // may be lifted by adding a data dependence check or memory SSA. + if (!hasSideEffects(InsertBB, Paths, InstructionsToHoist, HasSideEffects, + NoSideEffects, [](Instruction *I) -> bool { + return I->mayHaveSideEffects(); + })) + HPL.push_back(std::make_pair(InsertBB, InstructionsToHoist)); + } + } + + // Return true when all operands of Instr are available at insertion point + // InsertBB. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(Instruction *I, BasicBlock *InsertBB) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + Instruction *Inst = dyn_cast(Op); + if (!Inst) + continue; + + if (!DT->dominates(Inst->getParent(), InsertBB)) + return false; + } + + return true; + } + + bool hoist(HoistingPointList &HPL) { + bool Res = false; + for (auto HP : HPL) { + // Find out whether we already have one of the instructions in InsertBB, + // in which case we do not have to move it. + BasicBlock *InsertBB = HP.first; + std::vector InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (auto &I : InstructionsToHoist) + if (I->getParent() == InsertBB) { + if (!Repl) { + Repl = I; + } else { + // There are two instructions in InsertBB to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + for (Instruction &I1 : *InsertBB) + if (&I1 == Repl) { + // Repl was already the first one. + break; + } else if (&I1 == I) { + Repl = I; + break; + } + } + } + + if (Repl) { + // Repl is already in InsertBB: it remains in place. + assert(allOperandsAvailable(Repl, InsertBB) && + "instruction depends on operands that are not available"); + } else { + // When we do not find Repl in InsertBB, select the first in the list + // and move it to InsertBB. + Repl = InstructionsToHoist.front(); + + // We can move Repl in InsertBB only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands: for example, a load may not be hoisted until the gep + // computing the address of the load is hoisted. + if (!allOperandsAvailable(Repl, InsertBB)) + continue; + + Repl->moveBefore(InsertBB->getTerminator()); + } + + Res = true; + NumHoisted++; + + // Remove and rename all other instructions. + for (Instruction *I : InstructionsToHoist) + if (I != Repl) { + NumRemoved++; + I->replaceAllUsesWith(Repl); + I->eraseFromParent(); + } + } + + return Res; + } + + // Hoist all expressions. + bool hoistExpressions(Function &F) { + // Record all scalar expressions with the same VN in VNtoScalars, and all + // loads with the same VN in VNtoLoads. + VNtoInsns VNtoScalars; + VNtoInsns VNtoLoads; + for (BasicBlock &BB : F) { + for (Instruction &I1 : BB) { + LoadInst *Load = dyn_cast(&I1); + if (!Load) { + unsigned V = VN.lookup_or_add(&I1); + VNtoScalars.insert(std::make_pair(V, &I1)); + continue; + } + if (!Load->isSimple()) + continue; + Value *Ptr = Load->getPointerOperand(); + unsigned V = VN.lookup_or_add(Ptr); + VNtoLoads.insert(std::make_pair(V, Load)); + } + } + + HoistingPointList HPL; + computeInsertionPoints(VNtoScalars, HPL, false); + computeInsertionPoints(VNtoLoads, HPL, true); + return hoist(HPL); + } + + bool run(Function &F) { + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = hoistExpressions(F); + + // To address a limitation of the current GVN, we need to rerun the hoisting + // after we hoisted loads in order to be able to hoist all scalars dependent + // on the hoisted loads. + VN.clear(); + Res |= hoistExpressions(F); + return false; + } +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipOptnoneFunction(F)) + return false; + + auto &DT = getAnalysis().getDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + auto &LI = getAnalysis().getLoopInfo(); + + GVNHoistLegacyPassImpl G(&DT, &AA, &MD, &LI); + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNHoistPass::run(Function &F, + AnalysisManager &AM) { + DominatorTree &DT = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + MemoryDependenceResults &MD = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); + + GVNHoistLegacyPassImpl G(&DT, &AA, &MD, &LI); + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNHoistLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist", + "Early GVN Hoisting of Expressions", false, false) + +FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -43,6 +43,7 @@ initializeDSEPass(Registry); initializeGVNLegacyPassPass(Registry); initializeEarlyCSELegacyPassPass(Registry); + initializeGVNHoistLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -236,6 +237,10 @@ unwrap(PM)->add(createEarlyCSEPass()); } +void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createGVNHoistPass()); +} + void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTypeBasedAAWrapperPass()); } Index: llvm/test/Transforms/GVN/hoist.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/hoist.ll @@ -0,0 +1,604 @@ +; RUN: opt -gvn-hoist -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@GlobalVar = internal global float 1.000000e+00 + +; Check that all scalar expressions are hoisted. +; +; CHECK-LABEL: @scalarsHoisting +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @scalarsHoisting(float %d, float %min, float %max, float %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %sub = fsub float %min, %a + %mul = fmul float %sub, %div + %sub1 = fsub float %max, %a + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %sub3 = fsub float %max, %a + %mul4 = fmul float %sub3, %div + %sub5 = fsub float %min, %a + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that all loads and scalars depending on the loads are hoisted. +; Check that getelementptr computation gets hoisted before the load. +; +; CHECK-LABEL: @readsAndScalarsHoisting +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %A = getelementptr float, float* %min, i32 1 + %0 = load float, float* %A, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %B = getelementptr float, float* %min, i32 1 + %5 = load float, float* %B, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads after a store: the first two loads will be +; hoisted, and then the third load will not be hoisted. +; +; CHECK-LABEL: @readsAndWrites +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: store +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + store float %0, float* @GlobalVar + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do hoist loads when the store is above the insertion point. +; +; CHECK-LABEL: @readsAndWriteAboveInsertPt +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + store float 0.000000e+00, float* @GlobalVar + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that dependent expressions are hoisted. +; CHECK-LABEL: @dependentScalarsHoisting +; CHECK: fsub +; CHECK: fadd +; CHECK: fdiv +; CHECK: fmul +; CHECK-NOT: fsub +; CHECK-NOT: fadd +; CHECK-NOT: fdiv +; CHECK-NOT: fmul +define float @dependentScalarsHoisting(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fsub float %b, %a + %e = fadd float %d, %a + %f = fdiv float %e, %a + %g = fmul float %f, %a + br label %if.end + +if.else: + %h = fsub float %b, %a + %i = fadd float %h, %a + %j = fdiv float %i, %a + %k = fmul float %j, %a + br label %if.end + +if.end: + %r = phi float [ %g, %if.then ], [ %k, %if.else ] + ret float %r +} + +; Check that all independent expressions are hoisted. +; CHECK-LABEL: @independentScalarsHoisting +; CHECK: fadd +; CHECK: fsub +; CHECK: fdiv +; CHECK: fmul +; CHECK-NOT: fsub +; CHECK-NOT: fdiv +; CHECK-NOT: fmul +define float @independentScalarsHoisting(float %a, float %b, i1 %c) { +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + %d = fadd float %b, %a + %e = fsub float %b, %a + %f = fdiv float %b, %a + %g = fmul float %b, %a + br label %if.end + +if.else: + %i = fadd float %b, %a + %h = fsub float %b, %a + %j = fdiv float %b, %a + %k = fmul float %b, %a + br label %if.end + +if.end: + %p = phi float [ %d, %if.then ], [ %i, %if.else ] + %q = phi float [ %e, %if.then ], [ %h, %if.else ] + %r = phi float [ %f, %if.then ], [ %j, %if.else ] + %s = phi float [ %g, %if.then ], [ %k, %if.else ] + %t = fadd float %p, %q + %u = fadd float %r, %s + %v = fadd float %t, %u + ret float %v +} + +; Check that we hoist load and scalar expressions in triangles. +; CHECK-LABEL: @triangleHoisting +; CHECK: load +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.end: ; preds = %entry + %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ] + %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ] + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + + %x = fadd float %p1, %mul6 + %y = fadd float %p2, %mul4 + %z = fadd float %x, %y + ret float %z +} + +; Check that we hoist load and scalar expressions in dominator. +; CHECK-LABEL: @dominatorHoisting +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %entry + %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ] + %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ] + + %x = fadd float %p1, %mul2 + %y = fadd float %p2, %mul + %z = fadd float %x, %y + ret float %z +} + +; Check that we hoist load and scalar expressions in dominator. +; CHECK-LABEL: @domHoisting +; CHECK: load +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK: load +; CHECK: fsub +; CHECK: fmul +; CHECK-NOT: load +; CHECK-NOT: fmul +; CHECK-NOT: fsub +define float @domHoisting(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.else: + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub9 = fsub float %6, %7 + %mul10 = fmul float %sub9, %div + %8 = load float, float* %min, align 4 + %sub12 = fsub float %8, %7 + %mul13 = fmul float %sub12, %div + br label %if.end + +if.end: + %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ] + %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ] + + %x = fadd float %p1, %mul2 + %y = fadd float %p2, %mul + %z = fadd float %x, %y + ret float %z +} + +; Check that we hoist a load at the right place within a same basic block. +; CHECK-LABEL: @hoistInSingleBB +; CHECK: load +; CHECK-NOT: load +define i32 @hoistInSingleBB() { +entry: + %D = alloca i32, align 4 + %0 = bitcast i32* %D to i8* + %bf = load i8, i8* %0, align 4 + %bf.clear = and i8 %bf, -3 + %bf1 = load i8, i8* %0, align 4 + %bf.clear1 = and i8 %bf1, 1 + ret i32 0 +} + +; Check that we do not hoist loads past stores within a same basic block. +; CHECK-LABEL: @noHoistInSingleBBWithStore +; CHECK: load +; CHECK: store +; CHECK: load +; CHECK: store +define i32 @noHoistInSingleBBWithStore() { +entry: + %D = alloca i32, align 4 + %0 = bitcast i32* %D to i8* + %bf = load i8, i8* %0, align 4 + %bf.clear = and i8 %bf, -3 + store i8 %bf.clear, i8* %0, align 4 + %bf1 = load i8, i8* %0, align 4 + %bf.clear1 = and i8 %bf1, 1 + store i8 %bf.clear1, i8* %0, align 4 + ret i32 0 +} + +; Check that we do not hoist loads past calls within a same basic block. +; CHECK-LABEL: @noHoistInSingleBBWithCall +; CHECK: load +; CHECK: call +; CHECK: load +declare void @foo() +define i32 @noHoistInSingleBBWithCall() { +entry: + %D = alloca i32, align 4 + %0 = bitcast i32* %D to i8* + %bf = load i8, i8* %0, align 4 + %bf.clear = and i8 %bf, -3 + call void @foo() + %bf1 = load i8, i8* %0, align 4 + %bf.clear1 = and i8 %bf1, 1 + ret i32 0 +} + +; Check that we do not hoist loads past stores in any branch of a diamond. +; CHECK-LABEL: @noHoistInDiamondWithOneStore1 +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store float 0.000000e+00, float* @GlobalVar + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + ; There are no side effects on the if.else branch. + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub6 = fsub float %6, %7 + %mul7 = fmul float %sub6, %div + %8 = load float, float* %min, align 4 + %sub8 = fsub float %8, %7 + %mul9 = fmul float %sub8, %div + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads past a store in any branch of a diamond. +; CHECK-LABEL: @noHoistInDiamondWithOneStore2 +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + ; There are no side effects on the if.then branch. + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %if.end + +if.else: ; preds = %entry + store float 0.000000e+00, float* @GlobalVar + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: ; preds = %if.else, %if.then + %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ] + + %6 = load float, float* %max, align 4 + %7 = load float, float* %a, align 4 + %sub6 = fsub float %6, %7 + %mul7 = fmul float %sub6, %div + %8 = load float, float* %min, align 4 + %sub8 = fsub float %8, %7 + %mul9 = fmul float %sub8, %div + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} + +; Check that we do not hoist loads outside a loop containing stores. +; CHECK-LABEL: @noHoistInLoopsWithStores +; CHECK: fdiv +; CHECK: fcmp +; CHECK: br +define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) { +entry: + %div = fdiv float 1.000000e+00, %d + %cmp = fcmp oge float %div, 0.000000e+00 + br i1 %cmp, label %do.body, label %if.else + +do.body: + %0 = load float, float* %min, align 4 + %1 = load float, float* %a, align 4 + + ; It is unsafe to hoist the loads outside the loop because of the store. + store float 0.000000e+00, float* @GlobalVar + + %sub = fsub float %0, %1 + %mul = fmul float %sub, %div + %2 = load float, float* %max, align 4 + %sub1 = fsub float %2, %1 + %mul2 = fmul float %sub1, %div + br label %while.cond + +while.cond: + %cmp1 = fcmp oge float %mul2, 0.000000e+00 + br i1 %cmp1, label %if.end, label %do.body + +if.else: + %3 = load float, float* %max, align 4 + %4 = load float, float* %a, align 4 + %sub3 = fsub float %3, %4 + %mul4 = fmul float %sub3, %div + %5 = load float, float* %min, align 4 + %sub5 = fsub float %5, %4 + %mul6 = fmul float %sub5, %div + br label %if.end + +if.end: + %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ] + %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ] + + %add = fadd float %tmax.0, %tmin.0 + ret float %add +} +