Index: include/llvm/Transforms/Scalar/ConstantHoisting.h =================================================================== --- include/llvm/Transforms/Scalar/ConstantHoisting.h +++ include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -36,6 +36,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" @@ -98,7 +99,7 @@ // Glue for old PM. bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - BasicBlock &Entry); + BlockFrequencyInfo *BFI, BasicBlock &Entry); void releaseMemory() { ConstantVec.clear(); @@ -112,6 +113,7 @@ const TargetTransformInfo *TTI; DominatorTree *DT; + BlockFrequencyInfo *BFI; BasicBlock *Entry; /// Keeps track of constant candidates found in the function. Index: lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- lib/Transforms/Scalar/ConstantHoisting.cpp +++ lib/Transforms/Scalar/ConstantHoisting.cpp @@ -53,6 +53,12 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); +static cl::opt ConstHoistWithBlockFrequency( + "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to reduce the " + "chance to execute const materialization more frequently than " + "without hoisting.")); + namespace { /// \brief The constant hoisting pass. class ConstantHoistingLegacyPass : public FunctionPass { @@ -68,6 +74,8 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (ConstHoistWithBlockFrequency) + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -82,6 +90,7 @@ char ConstantHoistingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", @@ -99,9 +108,13 @@ DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - bool MadeChange = Impl.runImpl( - Fn, getAnalysis().getTTI(Fn), - getAnalysis().getDomTree(), Fn.getEntryBlock()); + bool MadeChange = + Impl.runImpl(Fn, getAnalysis().getTTI(Fn), + getAnalysis().getDomTree(), + ConstHoistWithBlockFrequency + ? &getAnalysis().getBFI() + : nullptr, + Fn.getEntryBlock()); if (MadeChange) { DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -150,8 +163,23 @@ for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); + BlockFrequency TotalFreq; + if (BFI) { + for (auto BB : BBs) + TotalFreq += BFI->getBlockFreq(BB); + } + + // If BFI is valid, only return insert point when the freq of the BB to + // insert const materialization is less than the total freq of each const + // uses, otherwise return nullptr. + auto checkFreq = [&](Instruction *Ins) { + if (!BFI) + return Ins; + return (TotalFreq >= BFI->getBlockFreq(Ins->getParent())) ? Ins : nullptr; + }; + if (BBs.count(Entry)) - return &Entry->front(); + return checkFreq(&Entry->front()); while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; @@ -159,14 +187,14 @@ BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); if (BB == Entry) - return &Entry->front(); + return checkFreq(&Entry->front()); BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return findMatInsertPt(&FirstInst); + return checkFreq(findMatInsertPt(&FirstInst)); } @@ -550,6 +578,8 @@ for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. Instruction *IP = findConstantInsertionPoint(ConstInfo); + if (!IP) + continue; IntegerType *Ty = ConstInfo.BaseConstant->getType(); Instruction *Base = new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); @@ -587,9 +617,11 @@ /// \brief Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, - DominatorTree &DT, BasicBlock &Entry) { + DominatorTree &DT, BlockFrequencyInfo *BFI, + BasicBlock &Entry) { this->TTI = &TTI; this->DT = &DT; + this->BFI = BFI; this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -620,7 +652,10 @@ FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, TTI, DT, F.getEntryBlock())) + auto BFI = ConstHoistWithBlockFrequency + ? &AM.getResult(F) + : nullptr; + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) return PreservedAnalyses::all(); // FIXME: This should also 'preserve the CFG'. Index: test/CodeGen/X86/constant-hoisting-bfi.ll =================================================================== --- test/CodeGen/X86/constant-hoisting-bfi.ll +++ test/CodeGen/X86/constant-hoisting-bfi.ll @@ -0,0 +1,50 @@ +; RUN: llc -O2 -mtriple=x86_64-unknown-linux-gnu -consthoist-with-block-frequency=true < %s | FileCheck %s +; Check when BFI is enabled for constant hoisting, constant 214748364701 +; will not be hoisted to the func entry. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: foo: +; CHECK: # BB#0: # %entry +; CHECK-NOT: movabsq $214748364701, +; CHECK: # BB#1: # %if.then + +; Function Attrs: norecurse nounwind uwtable +define i64 @foo(i64* nocapture %a) local_unnamed_addr #0 { +entry: + %arrayidx = getelementptr inbounds i64, i64* %a, i64 9 + %0 = load i64, i64* %arrayidx, align 8 + %cmp = icmp slt i64 %0, 564 + br i1 %cmp, label %if.then, label %if.else5 + +if.then: ; preds = %entry + %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5 + %1 = load i64, i64* %arrayidx1, align 8 + %cmp2 = icmp slt i64 %1, 1009 + br i1 %cmp2, label %if.then3, label %return + +if.then3: ; preds = %if.then + %arrayidx4 = getelementptr inbounds i64, i64* %a, i64 6 + %2 = load i64, i64* %arrayidx4, align 8 + %inc = add nsw i64 %2, 1 + store i64 %inc, i64* %arrayidx4, align 8 + br label %return + +if.else5: ; preds = %entry + %arrayidx6 = getelementptr inbounds i64, i64* %a, i64 6 + %3 = load i64, i64* %arrayidx6, align 8 + %cmp7 = icmp slt i64 %3, 3512 + br i1 %cmp7, label %if.then8, label %return + +if.then8: ; preds = %if.else5 + %arrayidx9 = getelementptr inbounds i64, i64* %a, i64 7 + %4 = load i64, i64* %arrayidx9, align 8 + %inc10 = add nsw i64 %4, 1 + store i64 %inc10, i64* %arrayidx9, align 8 + br label %return + +return: ; preds = %if.else5, %if.then, %if.then8, %if.then3 + %retval.0 = phi i64 [ 214748364701, %if.then3 ], [ 214748364701, %if.then8 ], [ 250148364702, %if.then ], [ 256148364704, %if.else5 ] + ret i64 %retval.0 +} +