diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,34 +10,364 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +#include +#include +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "select-optimize" + +STATISTIC(NumSelectOptAnalyzed, + "Number of select groups considered for conversion to branch"); +STATISTIC(NumSelectConvertedExpColdOperand, + "Number of select groups converted due to expensive cold operand"); +STATISTIC(NumSelectConvertedHighPred, + "Number of select groups converted due to high-predictability"); +STATISTIC(NumSelectUnPred, + "Number of select groups not converted due to unpredictability"); + +static cl::opt ColdOperandThreshold( + "cold-operand-threshold", + cl::desc("Maximum frequency of path for an operand to be considered cold."), + cl::init(20), cl::Hidden); + namespace { class SelectOptimize : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *TSI; + const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; + const LoopInfo *LI; + DominatorTree *DT; + std::unique_ptr BFI; + std::unique_ptr BPI; + ProfileSummaryInfo *PSI; + OptimizationRemarkEmitter *ORE; + public: static char ID; + SelectOptimize() : FunctionPass(ID) { initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + +private: + // Select groups consist of consecutive select instructions with the same + // condition. + using SelectGroup = SmallVector; + using SelectGroups = SmallVector; + + bool optimizeSelects(Function &F); + void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); + void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); + void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + void findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + bool isConvertToBranchProfitableBase(const SmallVector &ASI); + bool hasExpensiveColdOperand(const SmallVector &ASI); + void getOneUseChain(Instruction *I, SmallVector &Chain); + bool isSelectHighlyPredictable(SelectInst *SI); + bool isExpensiveInst(Instruction *I); + bool isSelectKindSupported(SelectInst *SI); }; } // namespace char SelectOptimize::ID = 0; -INITIALIZE_PASS(SelectOptimize, "select-optimize", "Optimize selects", false, - false) + +INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } bool SelectOptimize::runOnFunction(Function &F) { + TM = &getAnalysis().getTM(); + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + + // If none of the select types is supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return false; + + // If even a predictable select is cheap, then a branch cannot be cheaper. + if (!TLI->isPredictableSelectExpensive()) + return false; + + TTI = &getAnalysis().getTTI(F); + DT = &getAnalysis().getDomTree(); + LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis().getPSI(); + ORE = &getAnalysis().getORE(); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + return false; + + return optimizeSelects(F); +} + +bool SelectOptimize::optimizeSelects(Function &F) { + // Determine for which select groups it is profitable converting to branches. + SelectGroups ProfSIGroups; + // Base heuristics apply only to non-loops and outer loops. + optimizeSelectsBase(F, ProfSIGroups); + // Separate heuristics for inner-most loops. + optimizeSelectsInnerLoops(F, ProfSIGroups); + + // Convert to branches the select groups that were deemed + // profitable-to-convert. + convertProfitableSIGroups(ProfSIGroups); + + // Code modified if at least one select group was converted. + return !ProfSIGroups.empty(); +} + +void SelectOptimize::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + // Base heuristics apply only to non-loops and outer loops. + Loop *L = LI->getLoopFor(&BB); + if (L && L->isInnermost()) + continue; + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + findProfitableSIGroupsBase(SIGroups, ProfSIGroups); +} + +void SelectOptimize::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) { llvm_unreachable("Unimplemented"); } + +void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { + llvm_unreachable("Unimplemented"); +} + +void SelectOptimize::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { + BasicBlock::iterator BBIt = BB.begin(); + while (BBIt != BB.end()) { + Instruction *I = &*BBIt++; + if (SelectInst *SI = dyn_cast(I)) { + SelectGroup SIGroup; + SIGroup.push_back(SI); + while (BBIt != BB.end()) { + SelectInst *NI = dyn_cast(&*BBIt); + if (NI && SI->getCondition() == NI->getCondition()) { + SIGroup.push_back(NI); + ++BBIt; + } else { + break; + } + } + + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SI)) + continue; + + SIGroups.push_back(SIGroup); + } + } +} + +void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : SIGroups) { + ++NumSelectOptAnalyzed; + if (isConvertToBranchProfitableBase(ASI)) + ProfSIGroups.push_back(ASI); + } +} + +bool SelectOptimize::isConvertToBranchProfitableBase( + const SmallVector &ASI) { + SelectInst *SI = ASI.front(); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); + + // If unpredictable, branch form is more profitable. + if (SI->getMetadata(LLVMContext::MD_unpredictable)) { + ++NumSelectUnPred; + ORmiss << "Not converted to branch because of unpredictable branch. "; + ORE->emit(ORmiss); + return false; + } + + // If highly predictable, branch form is more profitable. + if (isSelectHighlyPredictable(SI)) { + ++NumSelectConvertedHighPred; + OR << "Converted to branch because of highly predictable branch. "; + ORE->emit(OR); + return true; + } + + // Look for expensive operands in the cold operand's (if any) dependence + // chain of any of the selects in the group. + if (hasExpensiveColdOperand(ASI)) { + ++NumSelectConvertedExpColdOperand; + OR << "Converted to branch because of expensive cold operand."; + ORE->emit(OR); + return true; + } + + ORmiss << "Not profitable to convert to branch (base heuristic)."; + ORE->emit(ORmiss); + return false; +} + +bool SelectOptimize::hasExpensiveColdOperand( + const SmallVector &ASI) { + bool ColdOperand = false; + uint64_t TrueWeight, FalseWeight; + if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Min = std::min(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + // Is there a path with frequency 100 * Min; + } + if (!ColdOperand) + return false; + // Check if the cold path's dependence chain has an expensive operation for + // any of the selects of the group. + for (SelectInst *SI : ASI) { + Instruction *ColdI = nullptr; + if (TrueWeight < FalseWeight) + ColdI = dyn_cast(SI->getTrueValue()); + else + ColdI = dyn_cast(SI->getFalseValue()); + if (ColdI && ColdI->hasOneUse()) { + SmallVector ColdChain; + getOneUseChain(ColdI, ColdChain); + for (auto *ColdII : ColdChain) + if (isExpensiveInst(ColdII)) + return true; + } + } + return false; +} + +void SelectOptimize::getOneUseChain(Instruction *I, + SmallVector &Chain) { + std::queue Worklist; + Worklist.push(I); + SmallPtrSet Visited; + const Loop *L = LI->getLoopFor(I->getParent()); + while (!Worklist.empty()) { + Instruction *II = Worklist.front(); + Worklist.pop(); + + if (Visited.count(II)) + continue; + Visited.insert(II); + + if (I != II && !II->hasOneUse()) + continue; + + // Avoid considering instructions in outer or independent loops, or non-loop + // instructions (instructions with potentially much less frequency than the + // operand of the select). + const Loop *LII = LI->getLoopFor(II->getParent()); + if (L && L != LII && (!LII || !L->contains(LII))) + continue; + + // Avoid phi nodes that are not part of a loop-carried dependence. + // Could source from a cold basic block. + if (isa(II)) { + if (!LII) + continue; + if (LII->getHeader() != II->getParent()) + continue; + } + + Chain.push_back(II); + for (unsigned k = 0; k < II->getNumOperands(); ++k) + if (auto *OpI = dyn_cast(II->getOperand(k))) + Worklist.push(OpI); + } +} + +bool SelectOptimize::isSelectHighlyPredictable(SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; +} + +bool SelectOptimize::isExpensiveInst(Instruction *I) { + return TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency) >= + TargetTransformInfo::TCC_Expensive; +} + +bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + return TLI->isSelectSupported(SelectKind); +}