Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -103,6 +103,7 @@ void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); +void initializeControlHeightReductionLegacyPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -88,6 +88,7 @@ (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); + (void) llvm::createControlHeightReductionLegacyPass(); (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); Index: include/llvm/Transforms/Instrumentation/ControlHeightReduction.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Instrumentation/ControlHeightReduction.h @@ -0,0 +1,32 @@ +//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass merges conditional blocks of code and reduces the number of +// conditional branches in the hot paths based on profiles. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class ControlHeightReductionPass : + public PassInfoMixin { +public: + ControlHeightReductionPass(); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CONTROLHEIGHTREDUCTION_H Index: include/llvm/Transforms/Utils.h =================================================================== --- include/llvm/Transforms/Utils.h +++ include/llvm/Transforms/Utils.h @@ -113,6 +113,13 @@ /// This function returns a new pass that downgrades the debug info in the /// module to line tables only. ModulePass *createStripNonLineTableDebugInfoPass(); + +//===----------------------------------------------------------------------===// +// +// ControlHeightReudction - Merges conditional blocks of code and reduces the +// number of conditional branches in the hot paths based on profiles. +// +FunctionPass *createControlHeightReductionLegacyPass(); } #endif Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -87,6 +87,7 @@ #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" +#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" @@ -193,6 +194,10 @@ static Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); +static cl::opt + EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { case PassBuilder::O0: @@ -486,6 +491,10 @@ FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); + if (EnableCHR && Level == O3 && PGOOpt && + (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty())) + FPM.addPass(ControlHeightReductionPass()); + return FPM; } Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -148,6 +148,7 @@ FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) +FUNCTION_PASS("chr", ControlHeightReductionPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -152,6 +152,10 @@ "enable-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN sinking pass (default = off)")); +static cl::opt + EnableCHR("enable-chr", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -411,6 +415,10 @@ // Clean up after everything. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); + + if (EnableCHR && OptLevel >= 3 && + (!PGOInstrUse.empty() || !PGOSampleUse.empty())) + MPM.add(createControlHeightReductionLegacyPass()); } void PassManagerBuilder::populateModulePassManager( Index: lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- lib/Transforms/Instrumentation/CMakeLists.txt +++ lib/Transforms/Instrumentation/CMakeLists.txt @@ -2,6 +2,7 @@ AddressSanitizer.cpp BoundsChecking.cpp CGProfile.cpp + ControlHeightReduction.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp MemorySanitizer.cpp Index: lib/Transforms/Instrumentation/ControlHeightReduction.cpp =================================================================== --- /dev/null +++ lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -0,0 +1,1961 @@ +//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass merges conditional blocks of code and reduces the number of +// conditional branches in the hot paths based on profiles. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Transforms/Scalar.h" + +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "chr" + +#define CHR_DEBUG(X) LLVM_DEBUG(X) + +static cl::opt ForceCHR("force-chr", cl::init(false), cl::Hidden, + cl::desc("Apply CHR for all functions")); + +static cl::opt CHRBiasThreshold( + "chr-bias-threshold", cl::init(0.99), cl::Hidden, + cl::desc("CHR considers a branch bias greater than this ratio as biased")); + +static cl::opt CHRMergeThreshold( + "chr-merge-threshold", cl::init(2), cl::Hidden, + cl::desc("CHR merges a group of N branches/selects where N >= this value")); + +static cl::opt CHRModuleList( + "chr-module-list", cl::init(""), cl::Hidden, + cl::desc("Specify file to retrieve the list of modules to apply CHR to")); + +static cl::opt CHRFunctionList( + "chr-function-list", cl::init(""), cl::Hidden, + cl::desc("Specify file to retrieve the list of functions to apply CHR to")); + +static StringSet<> CHRModules; +static StringSet<> CHRFunctions; + +static void ParseCHRFilterFiles() { + if (!CHRModuleList.empty()) { + auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); + if (!FileOrErr) { + errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; + std::exit(1); + } + StringRef Buf = FileOrErr->get()->getBuffer(); + SmallVector Lines; + Buf.split(Lines, '\n'); + for (StringRef Line : Lines) { + Line = Line.trim(); + if (!Line.empty()) + CHRModules.insert(Line); + } + } + if (!CHRFunctionList.empty()) { + auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); + if (!FileOrErr) { + errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; + std::exit(1); + } + StringRef Buf = FileOrErr->get()->getBuffer(); + SmallVector Lines; + Buf.split(Lines, '\n'); + for (StringRef Line : Lines) { + Line = Line.trim(); + if (!Line.empty()) + CHRFunctions.insert(Line); + } + } +} + +namespace { +class ControlHeightReductionLegacyPass : public FunctionPass { +public: + static char ID; + + ControlHeightReductionLegacyPass() : FunctionPass(ID) { + initializeControlHeightReductionLegacyPassPass( + *PassRegistry::getPassRegistry()); + ParseCHRFilterFiles(); + } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} // end anonymous namespace + +char ControlHeightReductionLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, + "chr", + "Reduce control height in the hot paths", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) +INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, + "chr", + "Reduce control height in the hot paths", + false, false) + +FunctionPass *llvm::createControlHeightReductionLegacyPass() { + return new ControlHeightReductionLegacyPass(); +} + +namespace { + +struct CHRStats { + CHRStats() : NumBranches(0), NumBranchesDelta(0) {} + void print(raw_ostream &OS) const { + OS << "CHRStats: NumBranches=" << NumBranches + << ", NumBranchesDelta=" << NumBranchesDelta; + } + uint64_t NumBranches; // The original number of conditional branches / + // selects + uint64_t NumBranchesDelta; // The decrease of the number of conditional + // branches / selects in the hot paths due to CHR. +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) { + Stats.print(OS); + return OS; +} + +// RegInfo - some properties of a Region. +struct RegInfo { + RegInfo() : R(nullptr), HasBranch(false) {} + RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} + Region *R; + bool HasBranch; + SmallVector Selects; +}; + +typedef DenseMap> HoistStopMapTy; + +// CHRScope - a sequence of regions to CHR together. It corresponds to a +// sequence of conditional blocks. It can have subscopes which correspond to +// nested conditional blocks. Nested CHRScopes form a tree. +class CHRScope { + public: + CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { + assert(RI.R && "Null RegionIn"); + RegInfos.push_back(RI); + } + + Region *getParentRegion() { + assert(RegInfos.size() > 0 && "Empty CHRScope"); + Region *Parent = RegInfos[0].R->getParent(); + assert(Parent && "Unexpected to call this on the top-level region"); + return Parent; + } + + BasicBlock *getEntryBlock() { + assert(RegInfos.size() > 0 && "Empty CHRScope"); + return RegInfos.front().R->getEntry(); + } + + BasicBlock *getExitBlock() { + assert(RegInfos.size() > 0 && "Empty CHRScope"); + return RegInfos.back().R->getExit(); + } + + bool appendable(CHRScope *Next) { + // The next scope is appendable only if this scope is directly connected to + // it (which implies it post-dominates this scope) and this scope dominates + // it (no edge to the next scope outside this scope). + BasicBlock *NextEntry = Next->getEntryBlock(); + if (getExitBlock() != NextEntry) + // Not directly connected. + return false; + Region *LastRegion = RegInfos.back().R; + for (BasicBlock *Pred : predecessors(NextEntry)) + if (!LastRegion->contains(Pred)) + // There's an edge going into the entry of the next scope from outside + // of this scope. + return false; + return true; + } + + void append(CHRScope *Next) { + assert(RegInfos.size() > 0 && "Empty CHRScope"); + assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); + assert(getParentRegion() == Next->getParentRegion() && + "Must be siblings"); + assert(getExitBlock() == Next->getEntryBlock() && + "Must be adjacent"); + for (RegInfo &RI : Next->RegInfos) + RegInfos.push_back(RI); + for (CHRScope *Sub : Next->Subs) + Subs.push_back(Sub); + } + + void addSub(CHRScope *SubIn) { +#ifndef NDEBUG + bool is_child = false; + for (RegInfo &RI : RegInfos) + if (RI.R == SubIn->getParentRegion()) { + is_child = true; + break; + } + assert(is_child && "Must be a child"); +#endif + Subs.push_back(SubIn); + } + + // Split this scope at the boundary region into two, which will belong to the + // tail and returns the tail. + CHRScope *split(Region *Boundary) { + assert(Boundary && "Boundary null"); + assert(RegInfos.begin()->R != Boundary && + "Can't be split at beginning"); + auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), + [&Boundary](const RegInfo& RI) { + return Boundary == RI.R; + }); + if (BoundaryIt == RegInfos.end()) + return nullptr; + SmallVector TailRegInfos; + SmallVector TailSubs; + TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); + RegInfos.resize(BoundaryIt - RegInfos.begin()); + DenseSet TailRegionSet; + for (RegInfo &RI : TailRegInfos) + TailRegionSet.insert(RI.R); + for (auto It = Subs.begin(); It != Subs.end(); ) { + CHRScope *Sub = *It; + assert(Sub && "null Sub"); + Region *Parent = Sub->getParentRegion(); + if (TailRegionSet.count(Parent)) { + TailSubs.push_back(Sub); + It = Subs.erase(It); + } else { + assert(std::find_if(RegInfos.begin(), RegInfos.end(), + [&Parent](const RegInfo& RI) { + return Parent == RI.R; + }) != RegInfos.end() && + "Must be in head"); + ++It; + } + } + assert(HoistStopMap.empty() && "MapHoistStops must be empty"); + return new CHRScope(TailRegInfos, TailSubs); + } + + bool contains(Instruction *I) const { + BasicBlock *Parent = I->getParent(); + for (const RegInfo &RI : RegInfos) + if (RI.R->contains(Parent)) + return true; + return false; + } + + void print(raw_ostream &OS) const; + + SmallVector RegInfos; // Regions that belong to this scope + SmallVector Subs; // Subscopes. + + // The instruction at which to insert the CHR conditional branch (and hoist + // the dependent condition values). + Instruction *BranchInsertPoint; + + // True-biased and false-biased regions (conditional blocks), + // respectively. Used only for the outermost scope and includes regions in + // subscopes. The rest are unbiased. + DenseSet TrueBiasedRegions; + DenseSet FalseBiasedRegions; + // Among the biased regions, the regions that get CHRed. + SmallVector CHRRegions; + + // True-biased and false-biased selects, respectively. Used only for the + // outermost scope and includes ones in subscopes. + DenseSet TrueBiasedSelects; + DenseSet FalseBiasedSelects; + + // Map from one of the above regions to the instructions to stop + // hoisting instructions at through use-def chains. + HoistStopMapTy HoistStopMap; + + private: + CHRScope(SmallVector &RegInfosIn, + SmallVector &SubsIn) + : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { + Scope.print(OS); + return OS; +} + +class CHR { + public: + CHR(Function &Fin, DominatorTree &DTin, ProfileSummaryInfo &PSIin, + RegionInfo &RIin) + : F(Fin), DT(DTin), PSI(PSIin), RI(RIin) {} + + ~CHR() { + for (CHRScope *Scope : Scopes) { + delete Scope; + } + } + + bool run(); + + private: + // See the comments in CHR::run() for the high level flow of the algorithm and + // what the following functions do. + + void findScopes(SmallVectorImpl &Output) { + Region *R = RI.getTopLevelRegion(); + CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); + if (Scope) { + Output.push_back(Scope); + } + } + CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, + SmallVectorImpl &Scopes); + CHRScope *findScope(Region *R); + void checkScopeHoistable(CHRScope *Scope); + + void splitScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output); + SmallVector splitScope(CHRScope *Scope, + CHRScope *Outer, + DenseSet *OuterConditionValues, + Instruction *OuterInsertPoint, + SmallVectorImpl &Output, + DenseSet &Unhoistables); + + void setPerScopeBias(SmallVectorImpl &Scopes); + void setPerScopeBias(CHRScope *Scope, CHRScope *OutermostScope); + + void filterScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output); + + void setCHRRegions(SmallVectorImpl &Input, + SmallVectorImpl &Output); + void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); + + void sortScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output); + + void transformScopes(SmallVectorImpl &CHRScopes); + void transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs); + void cloneScopeBlocks(CHRScope *Scope, + BasicBlock *PreEntryBlock, + BasicBlock *ExitBlock, + Region *LastRegion, + ValueToValueMapTy &VMap); + BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, + BasicBlock *EntryBlock, + BasicBlock *NewEntryBlock, + ValueToValueMapTy &VMap); + void fixupBranchesAndSelects(CHRScope *Scope, + BasicBlock *PreEntryBlock, + BranchInst *MergedBR); + void fixupBranch(Region *R, + CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition, double &CHRBranchBias); + void fixupSelect(SelectInst* SI, + CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition, double &CHRBranchBias); + void addToMergedCondition(bool IsTrueBiased, Value *Cond, + Instruction *BranchOrSelect, + CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition); + + Function &F; + DominatorTree &DT; + ProfileSummaryInfo &PSI; + RegionInfo &RI; + CHRStats Stats; + + // All the true-biased regions in the function + DenseSet TrueBiasedRegionsGlobal; + // All the false-biased regions in the function + DenseSet FalseBiasedRegionsGlobal; + // All the true-biased selects in the function + DenseSet TrueBiasedSelectsGlobal; + // All the false-biased selects in the function + DenseSet FalseBiasedSelectsGlobal; + // A map from biased regions to their branch bias + DenseMap BranchBiasMap; + // A map from biased selects to their branch bias + DenseMap SelectBiasMap; + // All the scopes. + DenseSet Scopes; +}; + +} // end anonymous namespace + +static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { + if (ForceCHR) + return true; + + if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { + if (CHRModules.count(F.getParent()->getName())) + return true; + StringRef Name = F.getName(); + if (CHRFunctions.count(Name)) + return true; + const char* DemangledName = nullptr; + int Status = -1; + DemangledName = abi::__cxa_demangle(Name.str().c_str(), + nullptr, nullptr, &Status); + return DemangledName && CHRFunctions.count(DemangledName); + } + + assert(PSI.hasProfileSummary() && "Empty PSI?"); + return PSI.isFunctionEntryHot(&F); +} + +static void dumpIR(Function &F, const char *Label, CHRStats *Stats) { + std::string Name = F.getName().str(); + const char *DemangledName = nullptr; + int Status = -1; + DemangledName = abi::__cxa_demangle(Name.c_str(), + nullptr, nullptr, &Status); + if (DemangledName == nullptr) { + DemangledName = ""; + } + std::string ModuleName = F.getParent()->getName().str(); + CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " + << Name << "\n"); + if (Stats) + CHR_DEBUG(dbgs() << "Stats " << *Stats << "\n"); + CHR_DEBUG(F.dump()); +} + + +void CHRScope::print(raw_ostream &OS) const { + assert(RegInfos.size() > 0 && "Empty CHRScope"); + OS << "CHRScope["; + OS << RegInfos.size() << ", Regions["; + for (const RegInfo &RI : RegInfos) { + OS << RI.R->getNameStr(); + if (RI.HasBranch) + OS << " B"; + if (RI.Selects.size() > 0) + OS << " S" << RI.Selects.size(); + OS << ", "; + } + if (RegInfos[0].R->getParent()) { + OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); + } else { + // top level region + OS << "]"; + } + OS << ", Subs["; + for (CHRScope *Sub : Subs) { + OS << *Sub << ", "; + } + OS << "]]"; +} + +// Return true if the given instruction type can be hoisted by CHR. +static bool isHoistableInstructionType(Instruction *I) { + return isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I); +} + +// Return true if the given instruction can be hoisted by CHR. +static bool isHoistable(Instruction *I, DominatorTree &DT) { + if (!isHoistableInstructionType(I)) + return false; + return isSafeToSpeculativelyExecute(I, nullptr, &DT); +} + +// Recursively traverse the use-def chains of the given value and return a set +// of the unhoistable base values defined within the scope (excluding the +// first-region entry block) or the (hoistable or unhoistable) base values that +// are defined outside (including the first-region entry block) of the +// scope. The returned set doesn't include constants. +static std::set getBaseValues(Value *V, + DominatorTree &DT) { + std::set Result; + if (auto *I = dyn_cast(V)) { + // We don't stop at a block that's not in the Scope because we would miss some + // instructions that are based on the same base values if we stop there. + if (!isHoistable(I, DT)) { + Result.insert(I); + return Result; + } + // I is hoistable above the Scope. + for (Value *Op : I->operands()) { + std::set OpResult = getBaseValues(Op, DT); + Result.insert(OpResult.begin(), OpResult.end()); + } + return Result; + } + if (isa(V)) { + Result.insert(V); + return Result; + } + // We don't include others like constants because those won't lead to any + // chance of folding of conditions (eg two bit checks merged into one check) + // after CHR. + return Result; // empty +} + +// Return true if V is already hoisted or can be hoisted (along with its +// operands) above the insert point. When it returns true and HoistStops is +// non-null, the instructions to stop hoisting at through the use-def chains are +// inserted into HoistStops. +static bool +checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, + DenseSet &Unhoistables, + DenseSet *HoistStops) { + assert(InsertPoint && "Null InsertPoint"); + if (auto *I = dyn_cast(V)) { + assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); + assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); + if (Unhoistables.count(I)) { + // Don't hoist if they are not to be hoisted. + return false; + } + if (DT.dominates(I, InsertPoint)) { + // We are already above the insert point. Stop here. + if (HoistStops) + HoistStops->insert(I); + return true; + } + // We aren't not above the insert point, check if we can hoist it above the + // insert point. + if (isHoistable(I, DT)) { + // Check operands first. + DenseSet OpsHoistStops; + bool AllOpsHoisted = true; + for (Value *Op : I->operands()) { + if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { + AllOpsHoisted = false; + break; + } + } + if (AllOpsHoisted) { + CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); + if (HoistStops) + HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); + return true; + } + } + return false; + } + // Non-instructions are considered hoistable. + return true; +} + +// Returns true and sets the true probability and false probability of an +// MD_prof metadata if it's well-formed. +static bool CheckMDProf(MDNode *MD, double &TrueProb, double &FalseProb) { + if (!MD) return false; + MDString *MDName = cast(MD->getOperand(0)); + if (MDName->getString() != "branch_weights" || + MD->getNumOperands() != 3) + return false; + ConstantInt *TrueWeight = mdconst::extract(MD->getOperand(1)); + ConstantInt *FalseWeight = mdconst::extract(MD->getOperand(2)); + if (!TrueWeight || !FalseWeight) + return false; + double TrueWeightD = TrueWeight->getValue().roundToDouble(); + double FalseWeightD = FalseWeight->getValue().roundToDouble(); + TrueProb = TrueWeightD / (TrueWeightD + FalseWeightD); + FalseProb = FalseWeightD / (TrueWeightD + FalseWeightD); + return true; +} + +// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= +// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= +// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return +// false. +template +bool CheckBias(K *Key, double TrueProb, double FalseProb, + S &TrueSet, S &FalseSet, M &BiasMap) { + if (TrueProb >= CHRBiasThreshold) { + TrueSet.insert(Key); + BiasMap[Key] = TrueProb; + return true; + } else if (FalseProb >= CHRBiasThreshold) { + FalseSet.insert(Key); + BiasMap[Key] = FalseProb; + return true; + } + return false; +} + +// Returns true and insert a region into the right biased set and the map if the +// branch of the region is biased. +static bool CheckBiasedBranch(BranchInst *BI, Region *R, + DenseSet &TrueBiasedRegionsGlobal, + DenseSet &FalseBiasedRegionsGlobal, + DenseMap &BranchBiasMap) { + if (!BI->isConditional()) + return false; + double ThenProb = 0, ElseProb = 0; + if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), + ThenProb, ElseProb)) + return false; + BasicBlock *IfThen = BI->getSuccessor(0); + BasicBlock *IfElse = BI->getSuccessor(1); + assert((IfThen == R->getExit() || IfElse == R->getExit()) && + IfThen != IfElse && + "Invariant from findScopes"); + if (IfThen == R->getExit()) { + // Swap them so that IfThen/ThenProb means going into the conditional code + // and IfElse/ElseProb means skipping it. + std::swap(IfThen, IfElse); + std::swap(ThenProb, ElseProb); + } + CHR_DEBUG(dbgs() << "BI " << *BI << " "); + CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); + CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); + return CheckBias(R, ThenProb, ElseProb, + TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, + BranchBiasMap); +} + +// Returns true and insert a select into the right biased set and the map if the +// select is biased. +static bool CheckBiasedSelect( + SelectInst *SI, Region *R, + DenseSet &TrueBiasedSelectsGlobal, + DenseSet &FalseBiasedSelectsGlobal, + DenseMap &SelectBiasMap) { + double TrueProb = 0, FalseProb = 0; + if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), + TrueProb, FalseProb)) + return false; + CHR_DEBUG(dbgs() << "SI " << *SI << " "); + CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); + CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); + return CheckBias(SI, TrueProb, FalseProb, + TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, + SelectBiasMap); +} + +// Returns the instruction at which to hoist the dependent condition values and +// insert the CHR branch for a region. This is the terminator branch in the +// entry block or the first select in the entry block, if any. +static Instruction* getBranchInsertPoint(RegInfo &RI) { + Region *R = RI.R; + BasicBlock *EntryBB = R->getEntry(); + // The hoist point is by default the terminator of the entry block, which is + // the same as the branch instruction if RI.HasBranch is true. + Instruction *HoistPoint = EntryBB->getTerminator(); + for (SelectInst *SI : RI.Selects) { + if (SI->getParent() == EntryBB) { + // Pick the first select in Selects in the entry block. Note Selects is + // sorted in the instruction order within a block (asserted below). + HoistPoint = SI; + break; + } + } + assert(HoistPoint && "Null HoistPoint"); +#ifndef NDEBUG + // Check that HoistPoint is the first one in Selects in the entry block, + // if any. + DenseSet EntryBlockSelectSet; + for (SelectInst *SI : RI.Selects) { + if (SI->getParent() == EntryBB) { + EntryBlockSelectSet.insert(SI); + } + } + for (Instruction &I : *EntryBB) { + if (EntryBlockSelectSet.count(&I) > 0) { + assert(&I == HoistPoint && + "HoistPoint must be the first one in Selects"); + break; + } + } +#endif + return HoistPoint; +} + +// Find a CHR scope in the given region. +CHRScope * CHR::findScope(Region *R) { + CHRScope *Result = nullptr; + BasicBlock *Entry = R->getEntry(); + BasicBlock *Exit = R->getExit(); // null if top level. + assert(Entry && "Entry must not be null"); + assert((Exit == nullptr) == (R->isTopLevelRegion()) && + "Only top level region has a null exit"); + if (Entry) + CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); + else + CHR_DEBUG(dbgs() << "Entry null\n"); + if (Exit) + CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); + else + CHR_DEBUG(dbgs() << "Exit null\n"); + // Exclude cases where Entry is part of a subregion (hence it doesn't belong + // to this region). + bool EntryInSubregion = RI.getRegionFor(Entry) != R; + // Exclude loops + bool loopy = false; + for (BasicBlock *Pred : predecessors(Entry)) { + if (R->contains(Pred)) { + loopy = true; + break; + } + } + if (Exit && !loopy) { + auto *BI = dyn_cast(Entry->getTerminator()); + if (BI) + CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); + else + CHR_DEBUG(dbgs() << "BI null\n"); + if (BI && BI->isConditional()) { + BasicBlock *S0 = BI->getSuccessor(0); + BasicBlock *S1 = BI->getSuccessor(1); + CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); + CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); + if (S0 != S1 && (S0 == Exit || S1 == Exit)) { + assert(!EntryInSubregion && + "Must not find an if-then when Entry is in a subregion"); + RegInfo RI(R); + RI.HasBranch = CheckBiasedBranch( + BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, + BranchBiasMap); + Result = new CHRScope(RI); + Scopes.insert(Result); + CHR_DEBUG(dbgs() << "Found a region with a branch\n"); + ++Stats.NumBranches; + } + } + } + if (!EntryInSubregion && !loopy) { + SmallVector Selects; + // Find Selects in the direct child blocks of R. + for (RegionNode *E : R->elements()) { + if (E->isSubRegion()) + continue; + // This returns the basic block of E if E is a direct child of R (not a + // subregion.) + BasicBlock *BB = E->getEntry(); + // Need to push in the order to make it easier to find the first Select + // later. + for (Instruction &I : *BB) { + if (auto *SI = dyn_cast(&I)) { + Selects.push_back(SI); + ++Stats.NumBranches; + } + } + } + if (Selects.size() > 0) { + if (!Result) { + CHR_DEBUG(dbgs() << "Found a select-only region\n"); + RegInfo RI(R); + for (auto *SI : Selects) { + if (CheckBiasedSelect(SI, R, + TrueBiasedSelectsGlobal, + FalseBiasedSelectsGlobal, + SelectBiasMap)) + RI.Selects.push_back(SI); + } + Result = new CHRScope(RI); + Scopes.insert(Result); + } else { + CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); + for (auto *SI : Selects) { + if (CheckBiasedSelect(SI, Result->RegInfos[0].R, + TrueBiasedSelectsGlobal, + FalseBiasedSelectsGlobal, + SelectBiasMap)) + Result->RegInfos[0].Selects.push_back(SI); + } + } + } + } + + if (Result) { + checkScopeHoistable(Result); + } + return Result; +} + +// Check that any of the branch and the selects in the region could be +// hoisted above the the CHR branch insert point (the most dominating of +// them, either the branch (at the end of the first block) or the first +// select in the first block). If the branch can't be hoisted, drop the +// selects in the first blocks. +void CHR::checkScopeHoistable(CHRScope *Scope) { + RegInfo &RI = Scope->RegInfos[0]; + Region *R = RI.R; + BasicBlock *EntryBB = R->getEntry(); + auto *Branch = RI.HasBranch ? + cast(EntryBB->getTerminator()) : nullptr; + SmallVector &Selects = RI.Selects; + if (RI.HasBranch || !Selects.empty()) { + Instruction *InsertPoint = getBranchInsertPoint(RI); + CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); + // Avoid a dependence from a select or a branch to a(nother) + // select. Note no instruction can't depend on a branch. + DenseSet Unhoistables; + for (SelectInst *SI : Selects) { + Unhoistables.insert(SI); + } + // Remove Selects that can't be hoisted. + for (auto it = Selects.begin(); it != Selects.end(); ) { + SelectInst *SI = *it; + if (SI == InsertPoint) { + ++it; + continue; + } + bool IsHoistable = true; + IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, + DT, Unhoistables, nullptr); + if (!IsHoistable) { + CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); + it = Selects.erase(it); + Unhoistables.erase(SI); + } else + ++it; + } + InsertPoint = getBranchInsertPoint(RI); + CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); + if (RI.HasBranch && InsertPoint != Branch) { + bool IsHoistable = true; + IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, + DT, Unhoistables, nullptr); + if (!IsHoistable) { + // If the branch isn't hoistable, drop the selects in the entry + // block, preferring the branch, which makes the branch the hoist + // point. + assert(InsertPoint != Branch && "Branch must not be the hoist point"); + CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); + CHR_DEBUG( + for (SelectInst *SI : Selects) { + dbgs() << "SI " << *SI << "\n"; + }); + Selects.erase(std::remove_if(Selects.begin(), Selects.end(), + [EntryBB](SelectInst *SI) { + return SI->getParent() == EntryBB; + }), Selects.end()); + Unhoistables.clear(); + InsertPoint = Branch; + } + } + CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); +#ifndef NDEBUG + if (RI.HasBranch) { + assert(!DT.dominates(Branch, InsertPoint) && + "Branch can't be already above the hoist point"); + assert(checkHoistValue(Branch->getCondition(), InsertPoint, + DT, Unhoistables, nullptr) && + "checkHoistValue for branch"); + } + for (auto *SI : Selects) { + assert(!DT.dominates(SI, InsertPoint) && + "SI can't be already above the hoist point"); + assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, + Unhoistables, nullptr) && + "checkHoistValue for selects"); + } + CHR_DEBUG(dbgs() << "Result\n"); + if (RI.HasBranch) { + CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); + } + for (auto *SI : Selects) { + CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); + } +#endif + } +} + +CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, + SmallVectorImpl &Scopes) { + CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); + CHRScope *Result = findScope(R); + // Visit subscopes. + if (R->begin() != R->end()) { + CHRScope *ConsecutiveSubscope = nullptr; + SmallVector Subscopes; + for (auto It = R->begin(); It != R->end(); ++It) { + const std::unique_ptr &SubR = *It; + auto Next_It = std::next(It); + Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; + CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() << "\n"); + CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); + if (SubCHRScope) { + CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); + } else { + CHR_DEBUG(dbgs() << "Subregion Scope null\n"); + } + if (SubCHRScope) { + if (!ConsecutiveSubscope) + ConsecutiveSubscope = SubCHRScope; + else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { + Subscopes.push_back(ConsecutiveSubscope); + ConsecutiveSubscope = SubCHRScope; + } else + ConsecutiveSubscope->append(SubCHRScope); + } else { + if (ConsecutiveSubscope) { + Subscopes.push_back(ConsecutiveSubscope); + } + ConsecutiveSubscope = nullptr; + } + } + if (ConsecutiveSubscope) { + Subscopes.push_back(ConsecutiveSubscope); + } + for (CHRScope *Sub : Subscopes) { + if (Result) { + // Combine it with the parent. + Result->addSub(Sub); + } else { + // Push Subscopes as they won't be combined with the parent. + Scopes.push_back(Sub); + } + } + } + return Result; +} + +static DenseSet getCHRConditionValuesForRegion(RegInfo &RI) { + DenseSet ConditionValues; + if (RI.HasBranch) { + auto *BI = cast(RI.R->getEntry()->getTerminator()); + ConditionValues.insert(BI->getCondition()); + } + for (SelectInst *SI : RI.Selects) { + ConditionValues.insert(SI->getCondition()); + } + return ConditionValues; +} + + +// Determine whether to split a scope depending on the sets of the branch +// condition values of the previous region and the current region. We split +// (return true) it if 1) the condition values of the inner/lower scope can't be +// hoisted up to the outer/upper scope, or 2) the two sets of the condition +// values have an empty intersection (because the combined branch conditions +// won't probably lead to a simpler combined condition). +static bool shouldSplit(Instruction *InsertPoint, + DenseSet &PrevConditionValues, + DenseSet &ConditionValues, + DominatorTree &DT, + DenseSet &Unhoistables) { + CHR_DEBUG( + dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; + for (Value *V : PrevConditionValues) { + dbgs() << *V << ", "; + } + dbgs() << " ConditionValues "; + for (Value *V : ConditionValues) { + dbgs() << *V << ", "; + } + dbgs() << "\n"); + assert(InsertPoint && "Null InsertPoint"); + // If any of Bases isn't hoistable to the hoist point, split. + for (Value *V : ConditionValues) { + if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { + CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); + return true; // Not hoistable, split. + } + } + // If PrevConditionValues or ConditionValues is empty, don't split to avoid + // unnecessary splits at scopes with no branch/selects. If + // PrevConditionValues and ConditionValues don't intersect at all, split. + if (!PrevConditionValues.empty() && !ConditionValues.empty()) { + // Use std::set as DenseSet doesn't work with set_intersection. + std::set PrevBases, Bases; + for (Value *V : PrevConditionValues) { + std::set BaseValues = getBaseValues(V, DT); + PrevBases.insert(BaseValues.begin(), BaseValues.end()); + } + for (Value *V : ConditionValues) { + std::set BaseValues = getBaseValues(V, DT); + Bases.insert(BaseValues.begin(), BaseValues.end()); + } + CHR_DEBUG( + dbgs() << "PrevBases "; + for (Value *V : PrevBases) { + dbgs() << *V << ", "; + } + dbgs() << " Bases "; + for (Value *V : Bases) { + dbgs() << *V << ", "; + } + dbgs() << "\n"); + std::set Intersection; + std::set_intersection(PrevBases.begin(), PrevBases.end(), + Bases.begin(), Bases.end(), + std::inserter(Intersection, Intersection.begin())); + if (Intersection.empty()) { + // Empty intersection, split. + CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); + return true; + } + } + CHR_DEBUG(dbgs() << "No split\n"); + return false; // Don't split. +} + +static void GetSelectsInScope(CHRScope *Scope, + DenseSet &Output) { + for (RegInfo &RI : Scope->RegInfos) { + for (SelectInst *SI : RI.Selects) { + Output.insert(SI); + } + } + for (CHRScope *Sub : Scope->Subs) { + GetSelectsInScope(Sub, Output); + } +} + +void CHR::splitScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output) { + for (CHRScope *Scope : Input) { + assert(!Scope->BranchInsertPoint && + "BranchInsertPoint must not be set"); + DenseSet Unhoistables; + GetSelectsInScope(Scope, Unhoistables); + splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); + } +#ifndef NDEBUG + for (CHRScope *Scope : Output) { + assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); + } +#endif +} + +SmallVector CHR::splitScope( + CHRScope *Scope, + CHRScope *Outer, + DenseSet *OuterConditionValues, + Instruction *OuterInsertPoint, + SmallVectorImpl &Output, + DenseSet &Unhoistables) { + if (Outer) { + assert(OuterConditionValues && "Null OuterConditionValues"); + assert(OuterInsertPoint && "Null OuterInsertPoint"); + } + bool PrevSplitFromOuter = true; + DenseSet PrevConditionValues; + Instruction *PrevInsertPoint = nullptr; + SmallVector Splits; + SmallVector SplitsSplitFromOuter; + SmallVector, 8> SplitsConditionValues; + SmallVector SplitsInsertPoints; + SmallVector RegInfos(Scope->RegInfos); // Copy + for (RegInfo &RI : RegInfos) { + Instruction *InsertPoint = getBranchInsertPoint(RI); + DenseSet ConditionValues = getCHRConditionValuesForRegion(RI); + CHR_DEBUG( + dbgs() << "ConditionValues "; + for (Value *V : ConditionValues) { + dbgs() << *V << ", "; + } + dbgs() << "\n"); + if (RI.R == RegInfos[0].R) { + // First iteration. Check to see if we should split from the outer. + if (Outer) { + CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); + CHR_DEBUG(dbgs() << "Should split from outer at " + << RI.R->getNameStr() << "\n"); + if (shouldSplit(OuterInsertPoint, *OuterConditionValues, + ConditionValues, DT, Unhoistables)) { + PrevConditionValues = ConditionValues; + PrevInsertPoint = InsertPoint; + } else { + // Not splitting from the outer. Use the outer bases and insert + // point. Union the bases. + PrevSplitFromOuter = false; + PrevConditionValues = *OuterConditionValues; + PrevConditionValues.insert(ConditionValues.begin(), + ConditionValues.end()); + PrevInsertPoint = OuterInsertPoint; + } + } else { + CHR_DEBUG(dbgs() << "Outer null\n"); + PrevConditionValues = ConditionValues; + PrevInsertPoint = InsertPoint; + } + } else { + CHR_DEBUG(dbgs() << "Should split from prev at " + << RI.R->getNameStr() << "\n"); + if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, + DT, Unhoistables)) { + CHRScope *Tail = Scope->split(RI.R); + Scopes.insert(Tail); + Splits.push_back(Scope); + SplitsSplitFromOuter.push_back(PrevSplitFromOuter); + SplitsConditionValues.push_back(PrevConditionValues); + SplitsInsertPoints.push_back(PrevInsertPoint); + Scope = Tail; + PrevConditionValues = ConditionValues; + PrevInsertPoint = InsertPoint; + PrevSplitFromOuter = true; + } else { + // Not splitting. Union the bases. Keep the hoist point. + PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); + } + } + } + Splits.push_back(Scope); + SplitsSplitFromOuter.push_back(PrevSplitFromOuter); + SplitsConditionValues.push_back(PrevConditionValues); + assert(PrevInsertPoint && "Null PrevInsertPoint"); + SplitsInsertPoints.push_back(PrevInsertPoint); + assert(Splits.size() == SplitsConditionValues.size() && + Splits.size() == SplitsSplitFromOuter.size() && + Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); + for (size_t I = 0; I < Splits.size(); ++I) { + CHRScope *Split = Splits[I]; + DenseSet &SplitConditionValues = SplitsConditionValues[I]; + Instruction *SplitInsertPoint = SplitsInsertPoints[I]; + SmallVector NewSubs; + DenseSet SplitUnhoistables; + GetSelectsInScope(Split, SplitUnhoistables); + for (CHRScope *Sub : Split->Subs) { + SmallVector SubSplits = splitScope( + Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, + SplitUnhoistables); + NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); + } + Split->Subs = NewSubs; + } + SmallVector Result; + for (size_t I = 0; I < Splits.size(); ++I) { + CHRScope *Split = Splits[I]; + if (SplitsSplitFromOuter[I]) { + // Split from the outer. + Output.push_back(Split); + Split->BranchInsertPoint = SplitsInsertPoints[I]; + CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] + << "\n"); + } else { + // Connected to the outer. + Result.push_back(Split); + } + } + if (!Outer) + assert(Result.empty() && + "If no outer (top-level), must return no nested ones"); + return Result; +} + +void CHR::setPerScopeBias(SmallVectorImpl &Scopes) { + for (CHRScope *Scope : Scopes) { + assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); + setPerScopeBias(Scope, Scope); + CHR_DEBUG( + dbgs() << "setPerScopeBias " << *Scope << "\n"; + dbgs() << "TrueBiasedRegions "; + for (Region *R : Scope->TrueBiasedRegions) { + dbgs() << R->getNameStr() << ", "; + } + dbgs() << "\n"; + dbgs() << "FalseBiasedRegions "; + for (Region *R : Scope->FalseBiasedRegions) { + dbgs() << R->getNameStr() << ", "; + } + dbgs() << "\n"; + dbgs() << "TrueBiasedSelects "; + for (SelectInst *SI : Scope->TrueBiasedSelects) { + dbgs() << *SI << ", "; + } + dbgs() << "\n"; + dbgs() << "FalseBiasedSelects "; + for (SelectInst *SI : Scope->FalseBiasedSelects) { + dbgs() << *SI << ", "; + } + dbgs() << "\n";); + } +} + +void CHR::setPerScopeBias(CHRScope *Scope, CHRScope *OutermostScope) { + for (RegInfo &RI : Scope->RegInfos) { + if (RI.HasBranch) { + Region *R = RI.R; + if (TrueBiasedRegionsGlobal.count(R) > 0) + OutermostScope->TrueBiasedRegions.insert(R); + else if (FalseBiasedRegionsGlobal.count(R) > 0) + OutermostScope->FalseBiasedRegions.insert(R); + else + llvm_unreachable("Must be biased"); + } + for (SelectInst *SI : RI.Selects) { + if (TrueBiasedSelectsGlobal.count(SI) > 0) + OutermostScope->TrueBiasedSelects.insert(SI); + else if (FalseBiasedSelectsGlobal.count(SI) > 0) + OutermostScope->FalseBiasedSelects.insert(SI); + else + llvm_unreachable("Must be biased"); + } + } + for (CHRScope *Sub : Scope->Subs) { + setPerScopeBias(Sub, OutermostScope); + } +} + +static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { + unsigned NumBiased = Scope->TrueBiasedRegions.size() + + Scope->FalseBiasedRegions.size() + + Scope->TrueBiasedSelects.size() + + Scope->FalseBiasedSelects.size(); + return NumBiased >= CHRMergeThreshold; +} + +void CHR::filterScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output) { + for (CHRScope *Scope : Input) { + // Filter out the ones with only one region and no subs. + if (!hasAtLeastTwoBiasedBranches(Scope)) { + CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " + << Scope->TrueBiasedRegions.size() + << " falsy-regions " << Scope->FalseBiasedRegions.size() + << " true-selects " << Scope->TrueBiasedSelects.size() + << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); + continue; + } + Output.push_back(Scope); + } +} + +void CHR::setCHRRegions(SmallVectorImpl &Input, + SmallVectorImpl &Output) { + for (CHRScope *Scope : Input) { + assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && + "Empty"); + setCHRRegions(Scope, Scope); + Output.push_back(Scope); + CHR_DEBUG( + dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; + for (auto pair : Scope->HoistStopMap) { + Region *R = pair.first; + dbgs() << "Region " << R->getNameStr() << "\n"; + for (Instruction *I : pair.second) { + dbgs() << "HoistStop " << *I << "\n"; + } + } + dbgs() << "CHRRegions" << "\n"; + for (RegInfo &RI : Scope->CHRRegions) { + dbgs() << RI.R->getNameStr() << "\n"; + }); + } +} + +void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { + DenseSet Unhoistables; + // Put the biased selects in Unhoistables because they should stay where they + // are and constant-folded after CHR (in case one biased select or a branch + // can depend on another biased select.) + for (RegInfo &RI : Scope->RegInfos) { + for (SelectInst *SI : RI.Selects) { + Unhoistables.insert(SI); + } + } + Instruction *InsertPoint = OutermostScope->BranchInsertPoint; + for (RegInfo &RI : Scope->RegInfos) { + Region *R = RI.R; + DenseSet HoistStops; + bool IsHoisted = false; + if (RI.HasBranch) { + assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || + OutermostScope->FalseBiasedRegions.count(R) > 0) && + "Must be truthy or falsy"); + auto *BI = cast(R->getEntry()->getTerminator()); + // Note checkHoistValue fills in HoistStops. + bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, + Unhoistables, &HoistStops); + assert(IsHoistable && "Must be hoistable"); + (void)(IsHoistable); // Unused in release build + IsHoisted = true; + } + for (SelectInst *SI : RI.Selects) { + assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || + OutermostScope->FalseBiasedSelects.count(SI) > 0) && + "Must be true or false biased"); + // Note checkHoistValue fills in HoistStops. + bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, + Unhoistables, &HoistStops); + assert(IsHoistable && "Must be hoistable"); + (void)(IsHoistable); // Unused in release build + IsHoisted = true; + } + if (IsHoisted) { + OutermostScope->CHRRegions.push_back(RI); + OutermostScope->HoistStopMap[R] = HoistStops; + } + } + for (CHRScope *Sub : Scope->Subs) + setCHRRegions(Sub, OutermostScope); +} + +bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { + return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); +} + +void CHR::sortScopes(SmallVectorImpl &Input, + SmallVectorImpl &Output) { + Output.resize(Input.size()); + std::copy(Input.begin(), Input.end(), Output.begin()); + std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); +} + +// Return true if V is already hoisted or was hoisted (along with its operands) +// to the insert point. +static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, + HoistStopMapTy &HoistStopMap, + DenseSet &HoistedSet, + DenseSet &TrivialPHIs) { + auto IT = HoistStopMap.find(R); + assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); + DenseSet &HoistStops = IT->second; + if (auto *I = dyn_cast(V)) { + if (I == HoistPoint) + return; + if (HoistStops.count(I)) + return; + if (auto *PN = dyn_cast(I)) + if (TrivialPHIs.count(PN)) + // The trivial phi inserted by the previous CHR scope could replace a + // non-phi in HoistStops. Note that since this phi is at the exit of a + // previous CHR scope, which dominates this scope, it's safe to stop + // hoisting there. + return; + if (HoistedSet.count(I)) + // Already hoisted, return. + return; + assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); + for (Value *Op : I->operands()) { + hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); + } + I->moveBefore(HoistPoint); + HoistedSet.insert(I); + CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); + } +} + +// Hoist the dependent condition values of the branches and the selects in the +// scope to the insert point. +static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, + DenseSet &TrivialPHIs) { + DenseSet HoistedSet; + for (const RegInfo &RI : Scope->CHRRegions) { + Region *R = RI.R; + bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); + bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); + if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { + auto *BI = cast(R->getEntry()->getTerminator()); + hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, + HoistedSet, TrivialPHIs); + } + for (SelectInst *SI : RI.Selects) { + bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); + bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); + if (!(IsTrueBiased || IsFalseBiased)) + continue; + hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, + HoistedSet, TrivialPHIs); + } + } +} + +// Negate the predicate if an ICmp if it's used only by branches or selects by +// swapping the operands of the branches or the selects. Returns true if success. +static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, + Instruction *ExcludedUser, + CHRScope *Scope) { + for (User *U : ICmp->users()) { + if (U == ExcludedUser) + continue; + if (isa(U) && cast(U)->isConditional()) + continue; + if (isa(U) && cast(U)->getCondition() == ICmp) + continue; + return false; + } + for (User *U : ICmp->users()) { + if (U == ExcludedUser) + continue; + if (auto *BI = dyn_cast(U)) { + assert(BI->isConditional() && "Must be conditional"); + BI->swapSuccessors(); + // Don't need to swap this in terms of + // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based + // mean whehter the branch is likely go into the if-then rather than + // successor0/successor1 and because we can tell which edge is the then or + // the else one by comparing the destination to the region exit block. + continue; + } + if (auto *SI = dyn_cast(U)) { + // Swap operands + Value *TrueValue = SI->getTrueValue(); + Value *FalseValue = SI->getFalseValue(); + SI->setTrueValue(FalseValue); + SI->setFalseValue(TrueValue); + SI->swapProfMetadata(); + if (Scope->TrueBiasedSelects.count(SI)) { + assert(Scope->FalseBiasedSelects.count(SI) == 0 && + "Must not be already in"); + Scope->FalseBiasedSelects.insert(SI); + } else if (Scope->FalseBiasedSelects.count(SI)) { + assert(Scope->TrueBiasedSelects.count(SI) == 0 && + "Must not be already in"); + Scope->TrueBiasedSelects.insert(SI); + } + continue; + } + llvm_unreachable("Must be a branch or a select"); + } + ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); + return true; +} + +// A helper for transformScopes. Insert a trivial phi at the scope exit block +// for a value that's defined in the scope but used outside it (meaning it's +// alive at the exit block). +static void insertTrivialPHIs(CHRScope *Scope, + BasicBlock *EntryBlock, BasicBlock *ExitBlock, + DenseSet &TrivialPHIs) { + DenseSet BlocksInScopeSet; + SmallVector BlocksInScopeVec; + for (RegInfo &RI : Scope->RegInfos) { + for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the + // sub-Scopes. + BlocksInScopeSet.insert(BB); + BlocksInScopeVec.push_back(BB); + } + } + CHR_DEBUG( + dbgs() << "Inserting redudant phis\n"; + for (BasicBlock *BB : BlocksInScopeVec) { + dbgs() << "BlockInScope " << BB->getName() << "\n"; + }); + for (BasicBlock *BB : BlocksInScopeVec) { + for (Instruction &I : *BB) { + SmallVector Users; + for (User *U : I.users()) { + if (auto *UI = dyn_cast(U)) { + if (BlocksInScopeSet.count(UI->getParent()) == 0 && + // Unless there's already a phi for I at the exit block. + !(isa(UI) && UI->getParent() == ExitBlock)) { + CHR_DEBUG(dbgs() << "V " << I << "\n"); + CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); + Users.push_back(UI); + } else if (UI->getParent() == EntryBlock && isa(UI)) { + // There's a loop backedge from a block that's dominated by this + // scope to the entry block. + CHR_DEBUG(dbgs() << "V " << I << "\n"); + CHR_DEBUG(dbgs() + << "Used at entry block (for a back edge) by a phi user " + << *UI << "\n"); + Users.push_back(UI); + } + } + } + if (Users.size() > 0) { + // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at + // ExitBlock. Replace I with the new phi in UI unless UI is another + // phi at ExitBlock. + unsigned PredCount = std::distance(pred_begin(ExitBlock), + pred_end(ExitBlock)); + PHINode *PN = PHINode::Create(I.getType(), PredCount, "", + &ExitBlock->front()); + for (BasicBlock *Pred : predecessors(ExitBlock)) { + PN->addIncoming(&I, Pred); + } + TrivialPHIs.insert(PN); + CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); + for (Instruction *UI : Users) { + for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { + if (UI->getOperand(J) == &I) { + UI->setOperand(J, PN); + } + } + CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); + } + } + } + } +} + +// Assert that all the CHR regions of the scope have a biased branch or select. +static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { +#ifndef NDEBUG + auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { + if (Scope->TrueBiasedRegions.count(RI.R) || + Scope->FalseBiasedRegions.count(RI.R)) + return true; + for (SelectInst *SI : RI.Selects) + if (Scope->TrueBiasedSelects.count(SI) || + Scope->FalseBiasedSelects.count(SI)) + return true; + return false; + }; + for (RegInfo &RI : Scope->CHRRegions) { + assert(HasBiasedBranchOrSelect(RI, Scope) && + "Must have biased branch or select"); + } +#endif +} + +// Assert that all the condition values of the biased branches and selects have +// been hoisted to the pre-entry block or outside of the scope. +static void assertBranchOrSelectConditionHoisted(CHRScope *Scope, + BasicBlock *PreEntryBlock) { + CHR_DEBUG(dbgs() << "Biased regions condition values \n"); + for (RegInfo &RI : Scope->CHRRegions) { + Region *R = RI.R; + bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); + bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); + if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { + auto *BI = cast(R->getEntry()->getTerminator()); + Value *V = BI->getCondition(); + CHR_DEBUG(dbgs() << *V << "\n"); + if (auto *I = dyn_cast(V)) { + assert((I->getParent() == PreEntryBlock || + !Scope->contains(I)) && + "Must have been hoisted to PreEntryBlock or outside the scope"); + } + } + for (SelectInst *SI : RI.Selects) { + bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); + bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); + if (!(IsTrueBiased || IsFalseBiased)) + continue; + Value *V = SI->getCondition(); + CHR_DEBUG(dbgs() << *V << "\n"); + if (auto *I = dyn_cast(V)) { + assert((I->getParent() == PreEntryBlock || + !Scope->contains(I)) && + "Must have been hoisted to PreEntryBlock or outside the scope"); + } + } + } +} + +void CHR::transformScopes(CHRScope *Scope, DenseSet &TrivialPHIs) { + CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); + + assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); + Region *FirstRegion = Scope->RegInfos[0].R; + BasicBlock *EntryBlock = FirstRegion->getEntry(); + Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; + BasicBlock *ExitBlock = LastRegion->getExit(); + + if (ExitBlock) { + // Insert a trivial phi at the exit block (where the CHR hot path and the + // cold path merges) for a value that's defined in the scope but used + // outside it (meaning it's alive at the exit block). We will add the + // incoming values for the CHR cold paths to it below. Without this, we'd + // miss updating phi's for such values unless there happens to already be a + // phi for that value there. + insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); + } + + // Split the entry block of the first region. The new block becomes the new + // entry block of the first region. The old entry block becomes the block to + // insert the CHR branch into. Note DT gets updated. Since DT gets updated + // through the split, we update the entry of the first region after the split, + // and Region only points to the entry and the exit blocks, rather than + // keeping everything in a list or set, the blocks membership and the + // entry/exit blocks of the region are still valid after the split. + CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() + << " at " << *Scope->BranchInsertPoint << "\n"); + BasicBlock *NewEntryBlock = + SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); + assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && + "NewEntryBlock's only pred must be EntryBlock"); + FirstRegion->replaceEntryRecursive(NewEntryBlock); + BasicBlock *PreEntryBlock = EntryBlock; + + ValueToValueMapTy VMap; + // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a + // hot path (originals) and a cold path (clones) and update the PHIs at the + // exit block. + cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); + + // Replace the old (placeholder) branch with the new (merged) conditional + // branch. + BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, + NewEntryBlock, VMap); + +#ifndef NDEBUG + assertCHRRegionsHaveBiasedBranchOrSelect(Scope); +#endif + + // Hoist the conditional values of the branches/selects. + hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); + +#ifndef NDEBUG + assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); +#endif + + // Create the combined branch condition and constant-fold the branches/selects + // in the hot path. + fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr); +} + +// A helper for transformScopes. Clone the blocks in the scope (excluding the +// PreEntryBlock) to split into a hot path and a cold path and update the PHIs +// at the exit block. +void CHR::cloneScopeBlocks(CHRScope *Scope, + BasicBlock *PreEntryBlock, + BasicBlock *ExitBlock, + Region *LastRegion, + ValueToValueMapTy &VMap) { + // Clone all the blocks. The original blocks will be the hot-path + // CHR-optimized code and the cloned blocks will be the original unoptimized + // code. This is so that the block pointers from the + // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code + // which CHR should apply to. + SmallVector NewBlocks; + for (RegInfo &RI : Scope->RegInfos) + for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the + // sub-Scopes. + assert(BB != PreEntryBlock && "Don't copy the preetntry block"); + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); + NewBlocks.push_back(NewBB); + VMap[BB] = NewBB; + } + + // Place the cloned blocks right after the original blocks (right before the + // exit block of.) + if (ExitBlock) + F.getBasicBlockList().splice(ExitBlock->getIterator(), + F.getBasicBlockList(), + NewBlocks[0]->getIterator(), F.end()); + + // Update the cloned blocks/instructions to refer to themselves. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) + for (Instruction &I : *NewBlocks[i]) + RemapInstruction(&I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for + // the top-level region but we don't need to add PHIs. The trivial PHIs + // inserted above will be updated here. + if (ExitBlock) + for (PHINode &PN : ExitBlock->phis()) + for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; + ++I) { + BasicBlock *Pred = PN.getIncomingBlock(I); + if (LastRegion->contains(Pred)) { + Value *V = PN.getIncomingValue(I); + auto It = VMap.find(V); + if (It != VMap.end()) V = It->second; + assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); + PN.addIncoming(V, cast(VMap[Pred])); + } + } +} + +// A helper for transformScope. Replace the old (placeholder) branch with the +// new (merged) conditional branch. +BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, + BasicBlock *EntryBlock, + BasicBlock *NewEntryBlock, + ValueToValueMapTy &VMap) { + BranchInst *OldBR = cast(PreEntryBlock->getTerminator()); + assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && + "SplitBlock did not work correctly!"); + assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && + "NewEntryBlock's only pred must be EntryBlock"); + assert(VMap.find(NewEntryBlock) != VMap.end() && + "NewEntryBlock must have been copied"); + OldBR->removeFromParent(); + OldBR->dropAllReferences(); + BranchInst *NewBR = BranchInst::Create(NewEntryBlock, + cast(VMap[NewEntryBlock]), + ConstantInt::getTrue(F.getContext())); + PreEntryBlock->getInstList().push_back(NewBR); + assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && + "NewEntryBlock's only pred must be EntryBlock"); + return NewBR; +} + +// A helper for transformScopes. Create the combined branch condition and +// constant-fold the branches/selects in the hot path. +void CHR::fixupBranchesAndSelects(CHRScope *Scope, + BasicBlock *PreEntryBlock, + BranchInst *MergedBR) { + Value *MergedCondition = ConstantInt::getTrue(F.getContext()); + double CHRBranchBias = 1.0; + uint64_t NumCHRedBranches = 0; + for (RegInfo &RI : Scope->CHRRegions) { + Region *R = RI.R; + if (RI.HasBranch) { + fixupBranch(R, Scope, PreEntryBlock, MergedCondition, CHRBranchBias); + ++NumCHRedBranches; + } + for (SelectInst *SI : RI.Selects) { + fixupSelect(SI, Scope, PreEntryBlock, MergedCondition, CHRBranchBias); + ++NumCHRedBranches; + } + } + Stats.NumBranchesDelta += NumCHRedBranches - 1; + MergedBR->setCondition(MergedCondition); + SmallVector Weights; + Weights.push_back(static_cast(CHRBranchBias * 1000.0)); + Weights.push_back(static_cast((1.0 - CHRBranchBias) * 1000.0)); + MDBuilder MDB(F.getContext()); + MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] + << "\n"); +} + +// A helper for fixupBranchesAndSelects. Add to the combined branch condition +// and constant-fold a branch in the hot path. +void CHR::fixupBranch(Region *R, CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition, double &CHRBranchBias) { + bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); + assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && + "Must be truthy or falsy"); + auto *BI = cast(R->getEntry()->getTerminator()); + assert(BranchBiasMap.find(R) != BranchBiasMap.end() && + "Must be in the bias map"); + double Bias = BranchBiasMap[R]; + assert(Bias >= CHRBiasThreshold && "Must be highly biased"); + CHRBranchBias *= Bias; + BasicBlock *IfThen = BI->getSuccessor(1); + BasicBlock *IfElse = BI->getSuccessor(0); + BasicBlock *RegionExitBlock = R->getExit(); + assert(RegionExitBlock && "Null ExitBlock"); + assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && + IfThen != IfElse && "Invariant from findScopes"); + if (IfThen == RegionExitBlock) { + // Swap them so that IfThen means going into it and IfElse means skipping + // it. + std::swap(IfThen, IfElse); + } + CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() + << " IfElse " << IfElse->getName() << "\n"); + Value *Cond = BI->getCondition(); + BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; + bool ConditionTrue = HotTarget == BI->getSuccessor(0); + addToMergedCondition(ConditionTrue, Cond, BI, Scope, PreEntryBlock, + MergedCondition); + // Constant-fold the branch at ClonedEntryBlock. + assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && + "The successor shouldn't change"); + Value *NewCondition = ConditionTrue ? + ConstantInt::getTrue(F.getContext()) : + ConstantInt::getFalse(F.getContext()); + BI->setCondition(NewCondition); +} + +// A helper for fixupBranchesAndSelects. Add to the combined branch condition +// and constant-fold a select in the hot path. +void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition, double &CHRBranchBias) { + bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); + assert((IsTrueBiased || + Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); + assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && + "Must be in the bias map"); + double Bias = SelectBiasMap[SI]; + assert(Bias >= CHRBiasThreshold && "Must be highly biased"); + CHRBranchBias *= Bias; + Value *Cond = SI->getCondition(); + addToMergedCondition(IsTrueBiased, Cond, SI, Scope, PreEntryBlock, + MergedCondition); + Value *NewCondition = IsTrueBiased ? + ConstantInt::getTrue(F.getContext()) : + ConstantInt::getFalse(F.getContext()); + SI->setCondition(NewCondition); +} + +// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged +// condition. +void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, + Instruction *BranchOrSelect, + CHRScope *Scope, + BasicBlock *PreEntryBlock, + Value *&MergedCondition) { + if (IsTrueBiased) { + Instruction *And = BinaryOperator::Create(Instruction::And, + MergedCondition, Cond); + PreEntryBlock->getInstList().insert( + PreEntryBlock->getTerminator()->getIterator(), And); + MergedCondition = And; + } else { + // If Cond is an icmp and all users of V except for BranchOrSelect is a + // branch, negate the icmp predicate and swap the branch targets and avoid + // inserting an Xor to negate Cond. + bool Done = false; + if (auto *ICmp = dyn_cast(Cond)) + if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { + Instruction *And = BinaryOperator::Create(Instruction::And, + MergedCondition, Cond); + PreEntryBlock->getInstList().insert( + PreEntryBlock->getTerminator()->getIterator(), And); + MergedCondition = And; + Done = true; + } + if (!Done) { + Instruction *Negate = + BinaryOperator::Create(Instruction::Xor, + ConstantInt::getTrue(F.getContext()), Cond); + Instruction *And = + BinaryOperator::Create(Instruction::And, MergedCondition, Negate); + PreEntryBlock->getInstList().insert( + PreEntryBlock->getTerminator()->getIterator(), Negate); + PreEntryBlock->getInstList().insert( + PreEntryBlock->getTerminator()->getIterator(), And); + MergedCondition = And; + } + } +} + +void CHR::transformScopes(SmallVectorImpl &CHRScopes) { + unsigned i = 0; + (void)(i); // Unused in release build. + DenseSet TrivialPHIs; + for (CHRScope *Scope : CHRScopes) { + transformScopes(Scope, TrivialPHIs); + CHR_DEBUG( + std::ostringstream oss; + oss << " after transformScopes " << i++; + dumpIR(F, oss.str().c_str(), nullptr)); + } +} + +static void dumpScopes(SmallVectorImpl &Scopes, const char * Label) { + dbgs() << Label << " " << Scopes.size() << "\n"; + for (CHRScope *Scope : Scopes) { + dbgs() << *Scope << "\n"; + } +} + +bool CHR::run() { + if (!shouldApply(F, PSI)) + return false; + + CHR_DEBUG(dumpIR(F, "before", nullptr)); + + bool Changed = false; + { + CHR_DEBUG( + dbgs() << "RegionInfo:\n"; + RI.print(dbgs())); + + // Recursively traverse the region tree and find regions that have biased + // branches and/or selects and create scopes. + SmallVector AllScopes; + findScopes(AllScopes); + CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); + + // Split the scopes if 1) the conditiona values of the biased + // branches/selects of the inner/lower scope can't be hoisted up to the + // outermost/uppermost scope entry, or 2) the condition values of the biased + // branches/selects in a scope (including subscopes) don't share at least + // one common value. + SmallVector SplitScopes; + splitScopes(AllScopes, SplitScopes); + CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); + + // After splitting, set the biased regions and selects of a scope (a tree + // root) that include those of the subscopes. + setPerScopeBias(SplitScopes); + CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); + + // Filter out the scopes that has only one biased region or select (CHR + // isn't useful in such a case). + SmallVector FilteredScopes; + filterScopes(SplitScopes, FilteredScopes); + CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); + + // Set the regions to be CHR'ed and their hoist stops for each scope. + SmallVector SetScopes; + setCHRRegions(FilteredScopes, SetScopes); + CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); + + // Sort CHRScopes by the depth so that outer CHRScopes comes before inner + // ones. We need to apply CHR from outer to inner so that we apply CHR only + // to the hot path, rather than both hot and cold paths. + SmallVector SortedScopes; + sortScopes(SetScopes, SortedScopes); + CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); + + CHR_DEBUG( + dbgs() << "RegionInfo:\n"; + RI.print(dbgs())); + + // Apply the CHR transformation. + if (!SortedScopes.empty()) { + transformScopes(SortedScopes); + Changed = true; + } + } + + if (Changed) + CHR_DEBUG(dumpIR(F, "after", &Stats)); + + return Changed; +} + +bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { + DominatorTree &DT = getAnalysis().getDomTree(); + ProfileSummaryInfo &PSI = + *getAnalysis().getPSI(); + RegionInfo &RI = getAnalysis().getRegionInfo(); + return CHR(F, DT, PSI, RI).run(); +} + +namespace llvm { + +ControlHeightReductionPass::ControlHeightReductionPass() { + ParseCHRFilterFiles(); +} + +PreservedAnalyses ControlHeightReductionPass::run( + Function &F, + FunctionAnalysisManager &FAM) { + auto &DT = FAM.getResult(F); + auto &MAMProxy = FAM.getResult(F); + auto &MAM = MAMProxy.getManager(); + auto &PSI = *MAM.getCachedResult(*F.getParent()); + auto &RI = FAM.getResult(F); + bool Changed = CHR(F, DT, PSI, RI).run(); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve(); + return PA; +} + +} // namespace llvm Index: lib/Transforms/Instrumentation/Instrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/Instrumentation.cpp +++ lib/Transforms/Instrumentation/Instrumentation.cpp @@ -59,6 +59,7 @@ initializeAddressSanitizerPass(Registry); initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); + initializeControlHeightReductionLegacyPassPass(Registry); initializeGCOVProfilerLegacyPassPass(Registry); initializePGOInstrumentationGenLegacyPassPass(Registry); initializePGOInstrumentationUseLegacyPassPass(Registry); Index: test/Transforms/PGOProfile/chr.ll =================================================================== --- /dev/null +++ test/Transforms/PGOProfile/chr.ll @@ -0,0 +1,1904 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -chr -instcombine -simplifycfg -S | FileCheck %s +; RUN: opt < %s -passes='require,function(chr,instcombine,simplify-cfg)' -S | FileCheck %s + +declare void @foo() +declare void @bar() + +; Simple case. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; if ((t0 & 2) != 0) // Likely true +; foo() +; -> +; t0 = *i +; if ((t0 & 3) != 0) { // Likely true +; foo() +; foo() +; } else { +; if ((t0 & 1) != 0) +; foo() +; if ((t0 & 2) != 0) +; foo() +; } +define void @test_chr_1(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + br label %bb3 + +bb3: + ret void +} + +; Simple case with a cold block. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; if ((t0 & 2) == 0) // Likely false +; bar() +; if ((t0 & 4) != 0) // Likely true +; foo() +; -> +; t0 = *i +; if ((t0 & 7) == 7) { // Likely true +; foo() +; foo() +; } else { +; if ((t0 & 1) != 0) +; foo() +; if ((t0 & 2) != 0) +; bar() +; if ((t0 & 4) != 0) +; foo() +; } +define void @test_chr_1_1(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 7 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB3_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[BB3_NONCHR]] +; CHECK: bb3.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[BB5]], label [[BB4_NONCHR:%.*]], !prof !16 +; CHECK: bb4.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb2, label %bb3, !prof !15 + +bb2: + call void @bar() + br label %bb3 + +bb3: + %5 = and i32 %0, 4 + %6 = icmp eq i32 %5, 0 + br i1 %6, label %bb5, label %bb4, !prof !15 + +bb4: + call void @foo() + br label %bb5 + +bb5: + ret void +} + +; With an aggregate bit check. +; Roughly, +; t0 = *i +; if ((t0 & 255) != 0) // Likely true +; if ((t0 & 1) != 0) // Likely true +; foo() +; if ((t0 & 2) != 0) // Likely true +; foo() +; -> +; t0 = *i +; if ((t0 & 3) != 0) { // Likely true +; foo() +; foo() +; } else if ((t0 & 255) != 0) +; if ((t0 & 1) != 0) +; foo() +; if ((t0 & 2) != 0) +; foo() +; } +define void @test_chr_2(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 255 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB4]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB2_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB3_NONCHR:%.*]], !prof !16 +; CHECK: bb3.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB2_NONCHR]] +; CHECK: bb4: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 255 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb4, label %bb0, !prof !15 + +bb0: + %3 = and i32 %0, 1 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb2, label %bb1, !prof !15 + +bb1: + call void @foo() + br label %bb2 + +bb2: + %5 = and i32 %0, 2 + %6 = icmp eq i32 %5, 0 + br i1 %6, label %bb4, label %bb3, !prof !15 + +bb3: + call void @foo() + br label %bb4 + +bb4: + ret void +} + +; Split case. +; Roughly, +; t1 = *i +; if ((t1 & 1) != 0) // Likely true +; foo() +; if ((t1 & 2) != 0) // Likely true +; foo() +; t2 = *i +; if ((t2 & 4) != 0) // Likely true +; foo() +; if ((t2 & 8) != 0) // Likely true +; foo() +; -> +; t1 = *i +; if ((t1 & 3) != 0) { // Likely true +; foo() +; foo() +; } else { +; if ((t1 & 1) != 0) +; foo() +; if ((t1 & 2) != 0) +; foo() +; } +; t2 = *i +; if ((t2 & 12) != 0) { // Likely true +; foo() +; foo() +; } else { +; if ((t2 & 4) != 0) +; foo() +; if ((t2 & 8) != 0) +; foo() +; } +define void @test_chr_3(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 12 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 12 +; CHECK-NEXT: br i1 [[TMP9]], label [[BB4:%.*]], label [[BB3_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb4: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb3.split.nonchr: +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP7]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: br i1 [[TMP11]], label [[BB5_NONCHR:%.*]], label [[BB4_NONCHR:%.*]], !prof !16 +; CHECK: bb4.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB5_NONCHR]] +; CHECK: bb5.nonchr: +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[BB7]], label [[BB6_NONCHR:%.*]], !prof !16 +; CHECK: bb6.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb7: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + br label %bb3 + +bb3: + %5 = load i32, i32* %i + %6 = and i32 %5, 4 + %7 = icmp eq i32 %6, 0 + br i1 %7, label %bb5, label %bb4, !prof !15 + +bb4: + call void @foo() + br label %bb5 + +bb5: + %8 = and i32 %5, 8 + %9 = icmp eq i32 %8, 0 + br i1 %9, label %bb7, label %bb6, !prof !15 + +bb6: + call void @foo() + br label %bb7 + +bb7: + ret void +} + +; Selects. +; Roughly, +; t0 = *i +; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false +; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) // Likely false +; return sum2 +; -> +; t0 = *i +; if ((t0 & 3) == 3) +; return sum0 + 85 +; else { +; sum1 = (t0 & 1) ? sum0 : (sum0 + 42) +; sum2 = (t0 & 2) ? sum1 : (sum1 + 43) +; return sum2 +; } +define i32 @test_chr_4(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: entry.split: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85 +; CHECK-NEXT: ret i32 [[TMP3]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 42 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP6]], i32 [[SUM0]], i32 [[TMP4]], !prof !16 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM1_NONCHR]], 43 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM1_NONCHR]], i32 [[TMP9]], !prof !16 +; CHECK-NEXT: ret i32 [[SUM2_NONCHR]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + %3 = add i32 %sum0, 42 + %sum1 = select i1 %2, i32 %sum0, i32 %3, !prof !15 + %4 = and i32 %0, 2 + %5 = icmp eq i32 %4, 0 + %6 = add i32 %sum1, 43 + %sum2 = select i1 %5, i32 %sum1, i32 %6, !prof !15 + ret i32 %sum2 +} + +; Selects + Brs +; Roughly, +; t0 = *i +; if ((t0 & 255) != 0) { // Likely true +; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false +; sum = (t0 & 2) ? sum : (sum + 43) // Likely false +; if ((t0 & 4) != 0) { // Likely true +; sum3 = sum + 44 +; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false +; } +; } +; return sum +; -> +; t0 = *i +; if ((t0 & 15) != 15) { // Likely true +; sum = sum0 + 173 +; } else if ((t0 & 255) != 0) { +; sum = (t0 & 1) ? sum0 : (sum0 + 42) +; sum = (t0 & 2) ? sum : (sum + 43) +; if ((t0 & 4) != 0) { +; sum3 = sum + 44 +; sum = (t0 & 8) ? sum3 : (sum3 + 44) +; } +; } +; return sum +define i32 @test_chr_5(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 15 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 15 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 42 +; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM1_NONCHR]], 43 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM1_NONCHR]], i32 [[TMP12]], !prof !16 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[TMP14]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[SUM6]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 255 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb3, label %bb0, !prof !15 + +bb0: + %3 = and i32 %0, 1 + %4 = icmp eq i32 %3, 0 + %5 = add i32 %sum0, 42 + %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15 + %6 = and i32 %0, 2 + %7 = icmp eq i32 %6, 0 + %8 = add i32 %sum1, 43 + %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15 + %9 = and i32 %0, 4 + %10 = icmp eq i32 %9, 0 + br i1 %10, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + %11 = and i32 %0, 8 + %12 = icmp eq i32 %11, 0 + %13 = add i32 %sum3, 44 + %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ] + br label %bb3 + +bb3: + %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ] + ret i32 %sum6 +} + +; Selects + Brs with a scope split in the middle +; Roughly, +; t0 = *i +; if ((t0 & 255) != 0) { // Likely true +; sum = (t0 & 1) ? sum0 : (sum0 + 42) // Likely false +; sum = (t0 & 2) ? sum : (sum + 43) // Likely false +; if ((sum0 & 4) != 0) { // Likely true. The condition doesn't use v. +; sum3 = sum + 44 +; sum = (t0 & 8) ? sum3 : (sum3 + 44) // Likely false +; } +; } +; return sum +; -> +; t0 = *i +; if ((sum0 & 4) != 0 && (t0 & 11) != 11) { // Likely true +; sum = sum0 + 173 +; } else if ((t0 & 255) != 0) { +; sum = (t0 & 1) ? sum0 : (sum0 + 42) +; sum = (t0 & 2) ? sum : (sum + 43) +; if ((sum0 & 4) != 0) { +; sum3 = sum + 44 +; sum = (t0 & 8) ? sum3 : (sum3 + 44) +; } +; } +; return sum +define i32 @test_chr_5_1(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_5_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUM0:%.*]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 11 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 11 +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP5]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SUM0]], 85 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM0]], 173 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP0]], 255 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[SUM0]], 42 +; CHECK-NEXT: [[SUM1_NONCHR:%.*]] = select i1 [[TMP11]], i32 [[SUM0]], i32 [[TMP12]], !prof !16 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SUM1_NONCHR]], 43 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP14]], i32 [[SUM1_NONCHR]], i32 [[TMP15]], !prof !16 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[SUM0]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 0 +; CHECK-NEXT: br i1 [[TMP17]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP19]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP7]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[SUM6]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 255 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb3, label %bb0, !prof !15 + +bb0: + %3 = and i32 %0, 1 + %4 = icmp eq i32 %3, 0 + %5 = add i32 %sum0, 42 + %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15 + %6 = and i32 %0, 2 + %7 = icmp eq i32 %6, 0 + %8 = add i32 %sum1, 43 + %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15 + %9 = and i32 %sum0, 4 ; Split + %10 = icmp eq i32 %9, 0 + br i1 %10, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + %11 = and i32 %0, 8 + %12 = icmp eq i32 %11, 0 + %13 = add i32 %sum3, 44 + %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ] + br label %bb3 + +bb3: + %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ] + ret i32 %sum6 +} + +; Selects + Brs, non-matching bases +; Roughly, +; i0 = *i +; j0 = *j +; if ((i0 & 255) != 0) { // Likely true +; sum = (i0 & 2) ? sum0 : (sum0 + 43) // Likely false +; if ((j0 & 4) != 0) { // Likely true. The condition uses j0, not i0. +; sum3 = sum + 44 +; sum = (i0 & 8) ? sum3 : (sum3 + 44) // Likely false +; } +; } +; return sum +; -> +; i0 = *i +; j0 = *j +; if ((j0 & 4) != 0 && (i0 & 10) != 10) { // Likely true +; sum = sum0 + 131 +; } else if ((i0 & 255) != 0) { +; sum = (i0 & 2) ? sum0 : (sum0 + 43) +; if ((j0 & 4) != 0) { +; sum3 = sum + 44 +; sum = (i0 & 8) ? sum3 : (sum3 + 44) +; } +; } +; return sum +define i32 @test_chr_6(i32* %i, i32* %j, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4 +; CHECK-NEXT: [[V10:%.*]] = icmp ne i32 [[V9]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 10 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 10 +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[V10]] +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[V13:%.*]] = add i32 [[SUM0]], 131 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[V1:%.*]] = and i32 [[I0]], 255 +; CHECK-NEXT: [[V2:%.*]] = icmp eq i32 [[V1]], 0 +; CHECK-NEXT: br i1 [[V2]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4_NONCHR:%.*]] = icmp eq i32 [[V3_NONCHR]], 0 +; CHECK-NEXT: [[V8_NONCHR:%.*]] = add i32 [[SUM0]], 43 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4_NONCHR]], i32 [[SUM0]], i32 [[V8_NONCHR]], !prof !16 +; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[J0]], 4 +; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0 +; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[I0]], 8 +; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[V12_NONCHR]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[V13]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[SUM6]] +; +entry: + %i0 = load i32, i32* %i + %j0 = load i32, i32* %j + %v1 = and i32 %i0, 255 + %v2 = icmp eq i32 %v1, 0 + br i1 %v2, label %bb3, label %bb0, !prof !15 + +bb0: + %v3 = and i32 %i0, 2 + %v4 = icmp eq i32 %v3, 0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + %v9 = and i32 %j0, 4 + %v10 = icmp eq i32 %v9, 0 + br i1 %v10, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + %v11 = and i32 %i0, 8 + %v12 = icmp eq i32 %v11, 0 + %v13 = add i32 %sum3, 44 + %sum4 = select i1 %v12, i32 %sum3, i32 %v13, !prof !15 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ] + br label %bb3 + +bb3: + %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ] + ret i32 %sum6 +} + +; Selects + Brs, the branch condition can't be hoisted to be merged with a +; select. No CHR happens. +; Roughly, +; i0 = *i +; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false +; foo(); +; j0 = *j +; if ((j0 & 4) != 0) { // Likely true +; foo(); +; sum = sum + 44 +; } +; return sum +; -> +; (no change) +define i32 @test_chr_7(i32* %i, i32* %j, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4 +; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 +; CHECK-NEXT: br i1 [[V10]], label [[BB2:%.*]], label [[BB1:%.*]], !prof !16 +; CHECK: bb1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[SUM4:%.*]] = add i32 [[SUM2]], 44 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[SUM5:%.*]] = phi i32 [ [[SUM2]], [[ENTRY:%.*]] ], [ [[SUM4]], [[BB1]] ] +; CHECK-NEXT: ret i32 [[SUM5]] +; +entry: + %i0 = load i32, i32* %i + %v3 = and i32 %i0, 2 + %v4 = icmp eq i32 %v3, 0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + call void @foo() + %j0 = load i32, i32* %j + %v9 = and i32 %j0, 4 + %v10 = icmp eq i32 %v9, 0 + br i1 %v10, label %bb2, label %bb1, !prof !15 ; %v10 can't be hoisted above the above select + +bb1: + call void @foo() + %sum4 = add i32 %sum2, 44 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %entry ], [ %sum4, %bb1 ] + ret i32 %sum5 +} + +; Selects + Brs, the branch condition can't be hoisted to be merged with the +; selects. Dropping the select. +; Roughly, +; i0 = *i +; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false +; foo(); +; j0 = *j +; if ((j0 & 4) != 0) // Likely true +; foo() +; if ((j0 & 8) != 0) // Likely true +; foo() +; return sum +; -> +; i0 = *i +; sum = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false +; foo(); +; j0 = *j +; if ((j0 & 12) != 12) { // Likely true +; foo() +; foo() +; } else { +; if ((j0 & 4) != 0) +; foo() +; if ((j0 & 8) != 0) +; foo() +; } +; return sum +define i32 @test_chr_7_1(i32* %i, i32* %j, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_7_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[J0]], 12 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 12 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[V9:%.*]] = and i32 [[J0]], 4 +; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 +; CHECK-NEXT: br i1 [[V10]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[V11_NONCHR:%.*]] = and i32 [[J0]], 8 +; CHECK-NEXT: [[V12_NONCHR:%.*]] = icmp eq i32 [[V11_NONCHR]], 0 +; CHECK-NEXT: br i1 [[V12_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 [[SUM2]] +; +entry: + %i0 = load i32, i32* %i + %v3 = and i32 %i0, 2 + %v4 = icmp eq i32 %v3, 0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + call void @foo() + %j0 = load i32, i32* %j + %v9 = and i32 %j0, 4 + %v10 = icmp eq i32 %v9, 0 + br i1 %v10, label %bb1, label %bb0, !prof !15 ; %v10 can't be hoisted above the above select + +bb0: + call void @foo() + br label %bb1 + +bb1: + %v11 = and i32 %j0, 8 + %v12 = icmp eq i32 %v11, 0 + br i1 %v12, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + br label %bb3 + +bb3: + ret i32 %sum2 +} + +; Branches aren't biased enough. No CHR happens. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Not biased +; foo() +; if ((t0 & 2) != 0) // Not biased +; foo() +; -> +; (no change) +define void @test_chr_8(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !17 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !17 +; CHECK: bb2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !16 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb3, label %bb2, !prof !16 + +bb2: + call void @foo() + br label %bb3 + +bb3: + ret void +} + +; With an existing phi at the exit. +; Roughly, +; t = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; if ((t0 & 2) != 0) { // Likely true +; t = *j +; foo() +; } +; // There's a phi for t here. +; return t +; -> +; t = *i +; if ((t & 3) == 3) { // Likely true +; foo() +; t = *j +; foo() +; } else { +; if ((t & 1) != 0) +; foo() +; if ((t & 2) != 0) { +; t = *j +; foo() +; } +; } +; // There's a phi for t here. +; return t +define i32 @test_chr_9(i32* %i, i32* %j) !prof !14 { +; CHECK-LABEL: @test_chr_9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[J]], align 4 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP0]], [[BB1_NONCHR]] ], [ [[TMP8]], [[BB2_NONCHR]] ] +; CHECK-NEXT: ret i32 [[TMP9]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb3, label %bb2, !prof !15 + +bb2: + %5 = load i32, i32* %j + call void @foo() + br label %bb3 + +bb3: + %6 = phi i32 [ %0, %bb1 ], [ %5, %bb2 ] + ret i32 %6 +} + +; With no phi at the exit, but the exit needs a phi inserted after CHR. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; t1 = *j +; if ((t1 & 2) != 0) // Likely true +; foo() +; return (t1 * 42) - (t1 - 99) +; -> +; t0 = *i +; if ((t0 & 3) == 3) { // Likely true +; foo() +; t1 = *j +; foo() +; } else { +; if ((t0 & 1) != 0) +; foo() +; if ((t0 & 2) != 0) { +; t1 = *j +; foo() +; } +; } +; // A new phi for t1 is inserted here. +; return (t1 * 42) - (t1 - 99) +define i32 @test_chr_10(i32* %i, i32* %j) !prof !14 { +; CHECK-LABEL: @test_chr_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[J]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP6]], [[BB2_NONCHR]] ], [ [[TMP6]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 42 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], -99 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: ret i32 [[TMP12]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %3 = load i32, i32* %j + %4 = and i32 %0, 2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + br label %bb3 + +bb3: + %6 = mul i32 %3, 42 + %7 = sub i32 %3, 99 + %8 = add i32 %6, %7 + ret i32 %8 +} + +; Hoisting a branch condition value whose dependencies reach the same value in +; two paths and may cause a bad hoisting that hoists it twice and puts a use +; ahead of its definition. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; // there are two use-def paths from the branch condition to t0. +; if ((1.0 / t0) * t0 < 1) // Likely true +; foo() +; -> +; t0 = *i +; if ((t0 & 1) != 0 && (1.0 / t0) * t0 > 0) { // Likely true +; foo() +; foo() +; } else { +; if ((t0 & 1) != 0) +; foo() +; if ((1.0 / t0) * t0 < 1) // Likely true +; foo() +; } +define void @test_chr_11(i32* %i, i32 %x) !prof !14 { +; CHECK-LABEL: @test_chr_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double +; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[CONV]] +; CHECK-NEXT: [[MUL16:%.*]] = fmul double [[DIV]], [[CONV]] +; CHECK-NEXT: [[CONV717:%.*]] = fptosi double [[MUL16]] to i32 +; CHECK-NEXT: [[CMP18:%.*]] = icmp sgt i32 [[CONV717]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[CMP18]] +; CHECK-NEXT: br i1 [[TMP3]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0_NONCHR:%.*]], label [[BB1_NONCHR:%.*]], !prof !18 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[CONV_NONCHR:%.*]] = sitofp i32 [[TMP0]] to double +; CHECK-NEXT: [[DIV_NONCHR:%.*]] = fdiv double 1.000000e+00, [[CONV_NONCHR]] +; CHECK-NEXT: [[MUL16_NONCHR:%.*]] = fmul double [[DIV_NONCHR]], [[CONV_NONCHR]] +; CHECK-NEXT: [[CONV717_NONCHR:%.*]] = fptosi double [[MUL16_NONCHR]] to i32 +; CHECK-NEXT: [[CMP18_NONCHR:%.*]] = icmp slt i32 [[CONV717_NONCHR]], 1 +; CHECK-NEXT: br i1 [[CMP18_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %conv = sitofp i32 %0 to double + %div = fdiv double 1.000000e+00, %conv + %mul16 = fmul double %div, %conv + %conv717 = fptosi double %mul16 to i32 + %cmp18 = icmp slt i32 %conv717, 1 + br i1 %cmp18, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + br label %bb3 + +bb3: + ret void +} + +; Selects + unrelated br only +define i32 @test_chr_12(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_12( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 255 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB3:%.*]], label [[BB0:%.*]], !prof !16 +; CHECK: bb0: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SUM0:%.*]], 42 +; CHECK-NEXT: [[SUM1:%.*]] = select i1 [[TMP4]], i32 [[SUM0]], i32 [[TMP5]], !prof !16 +; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SUM1]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[TMP7]], i32 [[SUM1]], i32 [[TMP8]], !prof !16 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP10]], [[TMP12]] +; CHECK-NEXT: br i1 [[TMP13]], label [[BB1:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb1: +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[SUM2]], 88 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: br i1 [[TMP10]], label [[BB1_NONCHR:%.*]], label [[BB3]], !prof !18 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP16]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[SUM0]], [[ENTRY:%.*]] ], [ [[TMP14]], [[BB1]] ], [ [[SUM2]], [[BB0_SPLIT_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[SUM6]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 255 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb3, label %bb0, !prof !15 + +bb0: + %3 = and i32 %0, 1 + %4 = icmp eq i32 %3, 0 + %5 = add i32 %sum0, 42 + %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15 + %6 = and i32 %0, 2 + %7 = icmp eq i32 %6, 0 + %8 = add i32 %sum1, 43 + %sum2 = select i1 %7, i32 %sum1, i32 %8, !prof !15 + %9 = load i32, i32* %i + %10 = icmp eq i32 %9, 0 + br i1 %10, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + %11 = and i32 %0, 8 + %12 = icmp eq i32 %11, 0 + %13 = add i32 %sum3, 44 + %sum4 = select i1 %12, i32 %sum3, i32 %13, !prof !15 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ] + br label %bb3 + +bb3: + %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ] + ret i32 %sum6 +} + +; In the second CHR, a condition value depends on a trivial phi that's inserted +; by the first CHR. +; Roughly, +; i0 = *i +; v2 = (z != 1) ? pred : true // Likely false +; if (z == 0 && pred) // Likely false +; foo() +; j0 = *j +; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false +; sum3 = ((i0 == j0) ? sum0 : (sum0 + 43) // Likely false +; foo() +; if ((i0 & 4) == 0) // Unbiased +; foo() +; return i0 + sum3 +; -> +; i0 = *i +; if (z != 1 && (z == 0 && pred)) // First CHR +; foo() +; // A trivial phi for i0 is inserted here by the first CHR (which gets removed +; // later) and the subsequent branch condition (for the second CHR) uses it. +; j0 = *j +; if ((i0 & 2) != j0 && i0 != j0) { // Second CHR +; sum3 = sum0 + 43 +; foo() +; if (i0 & 4) == 0) +; foo() +; } else { +; sum3 = (i0 == j0) ? sum0 : (sum0 + 43) +; foo() +; if (i0 & 4) == 0) +; foo() +; } +; return i0 + sum3 +define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 { +; CHECK-LABEL: @test_chr_14( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1 +; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0 +; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp ne i32 [[V6]], [[J0]] +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[V5:%.*]] = icmp ne i32 [[I0]], [[J0]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[V4]], [[V5]] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_SPLIT:%.*]], label [[BB1_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb1.split: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4 +; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 +; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb1.split.nonchr: +; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[I0]], [[J0]] +; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM0]], i32 [[V8]], !prof !16 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[V9_NONCHR:%.*]] = and i32 [[I0]], 4 +; CHECK-NEXT: [[V10_NONCHR:%.*]] = icmp eq i32 [[V9_NONCHR]], 0 +; CHECK-NEXT: br i1 [[V10_NONCHR]], label [[BB3]], label [[BB2_NONCHR:%.*]] +; CHECK: bb2.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[V8]], [[BB2]] ], [ [[V8]], [[BB1_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB2_NONCHR]] ], [ [[SUM3_NONCHR]], [[BB1_SPLIT_NONCHR]] ] +; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[TMP1]] +; CHECK-NEXT: ret i32 [[V11]] +; +entry: + %i0 = load i32, i32* %i + %v0 = icmp eq i32 %z, 0 + %v1 = icmp ne i32 %z, 1 + %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15 + %v3 = and i1 %v0, %pred + br i1 %v3, label %bb0, label %bb1, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %j0 = load i32, i32* %j + %v6 = and i32 %i0, 2 + %v4 = icmp eq i32 %v6, %j0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + %v5 = icmp eq i32 %i0, %j0 + %sum3 = select i1 %v5, i32 %sum0, i32 %v8, !prof !15 + call void @foo() + %v9 = and i32 %i0, 4 + %v10 = icmp eq i32 %v9, 0 + br i1 %v10, label %bb3, label %bb2 + +bb2: + call void @foo() + br label %bb3 + +bb3: + %v11 = add i32 %i0, %sum3 + ret i32 %v11 +} + +; Branch or selects depends on another select. No CHR happens. +; Roughly, +; i0 = *i +; if (z == 0 && ((z != 1) ? pred : true)) { // Likely false +; foo() +; j0 = *j +; sum2 = ((i0 & 2) == j0) ? sum0 : (sum0 + 43) // Likely false +; sum3 = (i0 == sum2) ? sum2 : (sum0 + 43) // Likely false. This depends on the +; // previous select. +; foo() +; if ((i0 & 4) == 0) // Unbiased +; foo() +; return i0 + sum3 +; -> +; (no change) +define i32 @test_chr_15(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14 { +; CHECK-LABEL: @test_chr_15( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z:%.*]], 0 +; CHECK-NEXT: [[V3:%.*]] = and i1 [[V0]], [[PRED:%.*]] +; CHECK-NEXT: br i1 [[V3]], label [[BB0:%.*]], label [[BB1:%.*]], !prof !16 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[J0:%.*]] = load i32, i32* [[J:%.*]], align 4 +; CHECK-NEXT: [[V6:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V6]], [[J0]] +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0:%.*]], 43 +; CHECK-NEXT: [[SUM2:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 +; CHECK-NEXT: [[V5:%.*]] = icmp eq i32 [[I0]], [[SUM2]] +; CHECK-NEXT: [[SUM3:%.*]] = select i1 [[V5]], i32 [[SUM2]], i32 [[V8]], !prof !16 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[V9:%.*]] = and i32 [[I0]], 4 +; CHECK-NEXT: [[V10:%.*]] = icmp eq i32 [[V9]], 0 +; CHECK-NEXT: br i1 [[V10]], label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[V11:%.*]] = add i32 [[I0]], [[SUM3]] +; CHECK-NEXT: ret i32 [[V11]] +; +entry: + %i0 = load i32, i32* %i + %v0 = icmp eq i32 %z, 0 + %v1 = icmp ne i32 %z, 1 + %v2 = select i1 %v1, i1 %pred, i1 true, !prof !15 + %v3 = and i1 %v0, %v2 + br i1 %v3, label %bb0, label %bb1, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %j0 = load i32, i32* %j + %v6 = and i32 %i0, 2 + %v4 = icmp eq i32 %v6, %j0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + %v5 = icmp eq i32 %i0, %sum2 + %sum3 = select i1 %v5, i32 %sum2, i32 %v8, !prof !15 + call void @foo() + %v9 = and i32 %i0, 4 + %v10 = icmp eq i32 %v9, 0 + br i1 %v10, label %bb3, label %bb2 + +bb2: + call void @foo() + br label %bb3 + +bb3: + %v11 = add i32 %i0, %sum3 + ret i32 %v11 +} + +; With an existing phi at the exit but a value (%v40) is both alive and is an +; operand to a phi at the exit block. +; Roughly, +; t0 = *i +; if ((t0 & 1) != 0) // Likely true +; foo() +; v40 = t0 + 44 +; if ((t0 & 2) != 0) // Likely true +; v41 = t0 + 99 +; foo() +; } +; v42 = phi v40, v41 +; return v42 + v40 +; -> +; t0 = *i +; if ((t0 & 3) == 3) // Likely true +; foo() +; v40 = t0 + 44 +; v41 = t0 + 99 +; foo() +; } else { +; if ((t0 & 1) != 0) // Likely true +; foo() +; v40_nc = t0 + 44 +; if ((t0 & 2) != 0) // Likely true +; v41_nc = t0 + 99 +; foo() +; } +; } +; t7 = phi v40, v40_nc +; v42 = phi v41, v41_nc +; v43 = v42 + t7 +; return v43 +define i32 @test_chr_16(i32* %i) !prof !14 { +; CHECK-LABEL: @test_chr_16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[V40:%.*]] = add i32 [[TMP0]], 44 +; CHECK-NEXT: [[V41:%.*]] = add i32 [[TMP0]], 99 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[BB1_NONCHR:%.*]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB1_NONCHR]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[V40_NONCHR:%.*]] = add i32 [[TMP0]], 44 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB2_NONCHR:%.*]], !prof !16 +; CHECK: bb2.nonchr: +; CHECK-NEXT: [[V41_NONCHR:%.*]] = add i32 [[TMP0]], 99 +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[V40]], [[BB0]] ], [ [[V40_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[V42:%.*]] = phi i32 [ [[V41]], [[BB0]] ], [ [[V41_NONCHR]], [[BB2_NONCHR]] ], [ [[V40_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: [[V43:%.*]] = add i32 [[V42]], [[TMP7]] +; CHECK-NEXT: ret i32 [[V43]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 1 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + br label %bb1 + +bb1: + %v40 = add i32 %0, 44 + %3 = and i32 %0, 2 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %bb3, label %bb2, !prof !15 + +bb2: + %v41 = add i32 %0, 99 + call void @foo() + br label %bb3 + +bb3: + %v42 = phi i32 [ %v41, %bb2 ], [ %v40, %bb1 ] + %v43 = add i32 %v42, %v40 + ret i32 %v43 +} + +; Two consecutive regions have an entry in the middle of them. No CHR happens. +; Roughly, +; if ((i & 4) == 0) { +; if (!j) +; goto bb1 +; } else { +; t0 = (i & 1) +; if (t0 != 0) // Likely true +; foo() +; s = (i & 1) + i +; } +; bb1: +; p = phi i, t0, s +; if ((i & 2) != 0) // Likely true +; foo() +; q = p + 2 +; } +; r = phi p, q, i +; return r +; -> +; (no change) +define i32 @test_chr_17(i32 %i, i1 %j) !prof !14 { +; CHECK-LABEL: @test_chr_17( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = and i32 [[I:%.*]], 4 +; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[V0]], 0 +; CHECK-NEXT: br i1 [[V1]], label [[BBE:%.*]], label [[BBQ:%.*]] +; CHECK: bbq: +; CHECK-NEXT: br i1 [[J:%.*]], label [[BB3:%.*]], label [[BB1:%.*]] +; CHECK: bbe: +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB1]], label [[BB0:%.*]], !prof !16 +; CHECK: bb0: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[S:%.*]] = add i32 [[TMP0]], [[I]] +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[I]], [[BBQ]] ], [ [[TMP0]], [[BBE]] ], [ [[S]], [[BB0]] ] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[I]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB3]], label [[BB2:%.*]], !prof !16 +; CHECK: bb2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: [[Q:%.*]] = add i32 [[P]], [[TMP2]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[P]], [[BB1]] ], [ [[Q]], [[BB2]] ], [ [[I]], [[BBQ]] ] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %v0 = and i32 %i, 4 + %v1 = icmp eq i32 %v0, 0 + br i1 %v1, label %bbe, label %bbq + +bbq: + br i1 %j, label %bb3, label %bb1 + +bbe: + %0 = and i32 %i, 1 + %1 = icmp eq i32 %0, 0 + br i1 %1, label %bb1, label %bb0, !prof !15 + +bb0: + call void @foo() + %s = add i32 %0, %i + br label %bb1 + +bb1: + %p = phi i32 [ %i, %bbq ], [ %0, %bbe ], [ %s, %bb0 ] + %2 = and i32 %i, 2 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %bb3, label %bb2, !prof !15 + +bb2: + call void @foo() + %q = add i32 %p, %2 + br label %bb3 + +bb3: + %r = phi i32 [ %p, %bb1 ], [ %q, %bb2 ], [ %i, %bbq ] + ret i32 %r +} + +; Select + br, there's a loop and we need to update the user of an inserted phi +; at the entry block. This is a regression test for a bug that's fixed. +; Roughly, +; do { +; inc1 = phi inc2, 0 +; li = *i +; sum1 = sum0 + 42 +; sum2 = ((li & 1) == 0) ? sum0 : sum1 // Likely false +; inc2 = inc1 + 1 +; if ((li & 4) != 0) // Likely true +; sum3 = sum2 + 44 +; sum4 = phi sum1, sum3 +; } while (inc2 != 100) // Likely true (loop back) +; return sum4 +; -> +; do { +; inc1 = phi tmp2, 0 // The first operand needed to be updated +; li = *i +; sum1 = sum0 + 42 +; if ((li & 5) == 5) { // Likely true +; inc2 = inc1 + 1 +; sum3 = sum0 + 86 +; } else { +; inc2_nc = inc1 + 1 +; if ((li & 4) == 0) +; sum2_nc = ((li & 1) == 0) ? sum0 : sum1 +; sum3_nc = sum2_nc + 44 +; } +; tmp2 = phi inc2, in2c_nc +; sum4 = phi sum3, sum3_nc, sum1 +; } while (tmp2 != 100) +; return sum4 +define i32 @test_chr_18(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_18( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP2:%.*]], [[BB2:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LI:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[SUM0:%.*]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[LI]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 5 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0.split: +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: [[SUM3:%.*]] = add i32 [[SUM0]], 86 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: [[A4_NONCHR:%.*]] = and i32 [[LI]], 4 +; CHECK-NEXT: [[CMP4_NONCHR:%.*]] = icmp eq i32 [[A4_NONCHR]], 0 +; CHECK-NEXT: [[INC2_NONCHR:%.*]] = add i32 [[INC1]], 1 +; CHECK-NEXT: br i1 [[CMP4_NONCHR]], label [[BB2]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[A1:%.*]] = and i32 [[LI]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A1]], 0 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[CMP1]], i32 [[SUM0]], i32 [[SUM1]], !prof !16 +; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP2]] = phi i32 [ [[INC2]], [[BB0_SPLIT]] ], [ [[INC2_NONCHR]], [[BB1_NONCHR]] ], [ [[INC2_NONCHR]], [[BB0_SPLIT_NONCHR]] ] +; CHECK-NEXT: [[SUM4:%.*]] = phi i32 [ [[SUM3]], [[BB0_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM1]], [[BB0_SPLIT_NONCHR]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 100 +; CHECK-NEXT: br i1 [[CMP]], label [[BB3:%.*]], label [[BB0]], !prof !16 +; CHECK: bb3: +; CHECK-NEXT: ret i32 [[SUM4]] +; +entry: + br label %bb0 + +bb0: + %inc1 = phi i32 [ %inc2, %bb2 ], [ 0, %entry ] + %li = load i32, i32* %i + %a1 = and i32 %li, 1 + %cmp1 = icmp eq i32 %a1, 0 + %sum1 = add i32 %sum0, 42 + %sum2 = select i1 %cmp1, i32 %sum0, i32 %sum1, !prof !15 + %a4 = and i32 %li, 4 + %cmp4 = icmp eq i32 %a4, 0 + %inc2 = add i32 %inc1, 1 + br i1 %cmp4, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + br label %bb2 + +bb2: + %sum4 = phi i32 [ %sum1, %bb0 ], [ %sum3, %bb1 ] + %cmp = icmp eq i32 %inc2, 100 + br i1 %cmp, label %bb3, label %bb0, !prof !15 + +bb3: + ret i32 %sum4 +} + + +; Selects + Brs. Those share the condition value, which causes the +; targets/operands of the branch/select to be flipped. +; Roughly, +; t0 = *i +; if ((t0 & 255) != 0) { // Likely true +; sum1 = ((t0 & 1) == 0) ? sum0 : (sum0 + 42) // Likely false +; sum2 = ((t0 & 1) == 0) ? sum1 : (sum1 + 42) // Likely false +; if ((t0 & 1) != 0) { // Likely true +; sum3 = sum2 + 44 +; sum4 = ((t0 & 8) == 0) ? sum3 : (sum3 + 44) // Likely false +; } +; sum5 = phi sum2, sum4 +; } +; sum6 = phi sum0, sum5 +; return sum6 +; -> +; t0 = *i +; if ((t0 & 9) == 9) { // Likely true +; tmp3 = sum0 + 85 // Dead +; tmp4 = sum0 + 173 +; } else { +; if ((t0 & 255) != 0) { +; sum2_nc = ((t0 & 1) == 0) ? sum0 : (sum0 + 85) +; sum4_nc_v = ((t0 & 8) == 0) ? 44 : 88 +; sum4_nc = add sum2_nc + sum4_nc_v +; } +; } +; sum6 = phi tmp4, sum0, sum2_nc, sum4_nc +; return sum6 +define i32 @test_chr_19(i32* %i, i32 %sum0) !prof !14 { +; CHECK-LABEL: @test_chr_19( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 9 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: br i1 [[TMP2]], label [[BB0:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUM0:%.*]], 85 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SUM0]], 173 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP0]], 255 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[BB3]], label [[BB0_NONCHR:%.*]], !prof !16 +; CHECK: bb0.nonchr: +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[SUM0]], 85 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[TMP8]], i32 [[SUM0]], i32 [[TMP9]], !prof !16 +; CHECK-NEXT: br i1 [[TMP8]], label [[BB3]], label [[BB1_NONCHR:%.*]], !prof !16 +; CHECK: bb1.nonchr: +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[SUM4_NONCHR_V:%.*]] = select i1 [[TMP11]], i32 44, i32 88, !prof !16 +; CHECK-NEXT: [[SUM4_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], [[SUM4_NONCHR_V]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SUM6:%.*]] = phi i32 [ [[TMP4]], [[BB0]] ], [ [[SUM0]], [[ENTRY_SPLIT_NONCHR]] ], [ [[SUM2_NONCHR]], [[BB0_NONCHR]] ], [ [[SUM4_NONCHR]], [[BB1_NONCHR]] ] +; CHECK-NEXT: ret i32 [[SUM6]] +; +entry: + %0 = load i32, i32* %i + %1 = and i32 %0, 255 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb3, label %bb0, !prof !15 + +bb0: + %3 = and i32 %0, 1 + %4 = icmp eq i32 %3, 0 + %5 = add i32 %sum0, 42 + %sum1 = select i1 %4, i32 %sum0, i32 %5, !prof !15 + %6 = add i32 %sum1, 43 + %sum2 = select i1 %4, i32 %sum1, i32 %6, !prof !15 + br i1 %4, label %bb2, label %bb1, !prof !15 + +bb1: + %sum3 = add i32 %sum2, 44 + %7 = and i32 %0, 8 + %8 = icmp eq i32 %7, 0 + %9 = add i32 %sum3, 44 + %sum4 = select i1 %8, i32 %sum3, i32 %9, !prof !15 + br label %bb2 + +bb2: + %sum5 = phi i32 [ %sum2, %bb0 ], [ %sum4, %bb1 ] + br label %bb3 + +bb3: + %sum6 = phi i32 [ %sum0, %entry ], [ %sum5, %bb2 ] + ret i32 %sum6 +} + +; Selects. The exit block, which belongs to the top-level region, has a select +; and causes the top-level region to be the outermost CHR scope with the +; subscope that includes the entry block with two selects. The outermost CHR +; scope doesn't see the selects in the entry block as the entry block is in the +; subscope and incorrectly sets the CHR hoist point to the branch rather than +; the first select in the entry block and causes the CHR'ed selects ("select i1 +; false...") to incorrectly position above the CHR branch. This is testing +; against a quirk of how the region analysis handles the entry block. +; Roughly, +; i0 = *i +; sum2 = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) // Likely false +; sum3 = ((i0 & 4) == 0) ? sum2 : (sum2 + 44) // Likely false +; if (j) +; foo() +; i5 = *i +; v13 = (i5 == 44) ? i5 : sum3 +; return v13 +; -> +; i0 = *i +; if ((i0 & 6) != 6) { // Likely true +; v9 = sum0 + 87 +; if (j) +; foo() +; } else { +; sum2.nc = ((i0 & 2) == 0) ? sum0 : (sum0 + 43) +; sum3.nc = ((i0 & 4) == 0) ? sum2.nc : (sum2.nc + 44) +; if (j) +; foo() +; } +; t2 = phi v9, sum3.nc +; i5 = *i +; v13 = (i5 == 44) ? 44 : t2 +; return v13 +define i32 @test_chr_20(i32* %i, i32 %sum0, i1 %j) !prof !14 { +; CHECK-LABEL: @test_chr_20( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[I0]], 6 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 6 +; CHECK-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: entry.split: +; CHECK-NEXT: [[V9:%.*]] = add i32 [[SUM0:%.*]], 87 +; CHECK-NEXT: br i1 [[J:%.*]], label [[BB1:%.*]], label [[BB4:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4]] +; CHECK: entry.split.nonchr: +; CHECK-NEXT: [[V8:%.*]] = add i32 [[SUM0]], 43 +; CHECK-NEXT: [[V3:%.*]] = and i32 [[I0]], 2 +; CHECK-NEXT: [[V4:%.*]] = icmp eq i32 [[V3]], 0 +; CHECK-NEXT: [[SUM2_NONCHR:%.*]] = select i1 [[V4]], i32 [[SUM0]], i32 [[V8]], !prof !16 +; CHECK-NEXT: [[V6_NONCHR:%.*]] = and i32 [[I0]], 4 +; CHECK-NEXT: [[V5_NONCHR:%.*]] = icmp eq i32 [[V6_NONCHR]], 0 +; CHECK-NEXT: [[V9_NONCHR:%.*]] = add i32 [[SUM2_NONCHR]], 44 +; CHECK-NEXT: [[SUM3_NONCHR:%.*]] = select i1 [[V5_NONCHR]], i32 [[SUM2_NONCHR]], i32 [[V9_NONCHR]], !prof !16 +; CHECK-NEXT: br i1 [[J]], label [[BB1_NONCHR:%.*]], label [[BB4]] +; CHECK: bb1.nonchr: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[V9]], [[BB1]] ], [ [[V9]], [[ENTRY_SPLIT]] ], [ [[SUM3_NONCHR]], [[BB1_NONCHR]] ], [ [[SUM3_NONCHR]], [[ENTRY_SPLIT_NONCHR]] ] +; CHECK-NEXT: [[I5:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[V12:%.*]] = icmp eq i32 [[I5]], 44 +; CHECK-NEXT: [[V13:%.*]] = select i1 [[V12]], i32 44, i32 [[TMP2]], !prof !16 +; CHECK-NEXT: ret i32 [[V13]] +; +entry: + %i0 = load i32, i32* %i + %v3 = and i32 %i0, 2 + %v4 = icmp eq i32 %v3, 0 + %v8 = add i32 %sum0, 43 + %sum2 = select i1 %v4, i32 %sum0, i32 %v8, !prof !15 + %v6 = and i32 %i0, 4 + %v5 = icmp eq i32 %v6, 0 + %v9 = add i32 %sum2, 44 + %sum3 = select i1 %v5, i32 %sum2, i32 %v9, !prof !15 + br i1 %j, label %bb1, label %bb4 + +bb1: + call void @foo() + br label %bb4 + +bb4: + %i5 = load i32, i32* %i + %v12 = icmp eq i32 %i5, 44 + %v13 = select i1 %v12, i32 %i5, i32 %sum3, !prof !15 + ret i32 %v13 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999000, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} + +!14 = !{!"function_entry_count", i64 100} +!15 = !{!"branch_weights", i32 0, i32 1} +!16 = !{!"branch_weights", i32 1, i32 1} +; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0} +; CHECK: !16 = !{!"branch_weights", i32 0, i32 1} +; CHECK: !17 = !{!"branch_weights", i32 1, i32 1} +; CHECK: !18 = !{!"branch_weights", i32 1, i32 0} +; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}