diff --git a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h --- a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h +++ b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h @@ -41,7 +41,7 @@ bool run(Module &M); private: - bool isFunctionCold(const Function &F) const; + bool isFunctionCold(Function &F, bool HasProfileSummary) const; bool shouldOutlineFrom(const Function &F) const; bool outlineColdRegions(Function &F, bool HasProfileSummary); Function *extractColdRegion(const BlockSequence &Region, diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -83,6 +83,11 @@ SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden, cl::desc("Base penalty for splitting cold code (as a " "multiple of TCC_Basic)")); +static cl::opt EnableRandomOutlining("hot-cold-randomly-outline-cold-code", + cl::init(true), cl::Hidden); + +static cl::opt EnableDeterministicRandomOutlining("hot-cold-deterministic-random-outline-code", + cl::init(true), cl::Hidden); namespace { // Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify @@ -100,28 +105,66 @@ return !(isa(I) || isa(I)); } -bool unlikelyExecuted(BasicBlock &BB) { +static Optional> +hasSingleSuccAndPred(const BasicBlock &BB) { + auto FirstSucc = succ_begin(&BB); + auto SuccEnd = succ_end(&BB); + if (FirstSucc == SuccEnd || ++FirstSucc != SuccEnd) + return None; + auto FirstPred = pred_begin(&BB); + auto PredEnd = pred_end(&BB); + if (FirstPred == PredEnd || ++FirstPred != PredEnd) + return None; + return Optional>( + {*succ_begin(&BB), *pred_begin(&BB)}); +} + +bool unlikelyExecuted(const BasicBlock &BB) { // Exception handling blocks are unlikely executed. if (BB.isEHPad() || isa(BB.getTerminator())) return true; // The block is cold if it calls/invokes a cold function. However, do not // mark sanitizer traps as cold. - for (Instruction &I : BB) - if (auto CS = CallSite(&I)) + for (const Instruction &I : BB) + if (const auto CS = CallSite(const_cast(&I))) if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize")) return true; // The block is cold if it has an unreachable terminator, unless it's // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp). if (blockEndsInUnreachable(BB)) { - if (auto *CI = + if (const auto *CI = dyn_cast_or_null(BB.getTerminator()->getPrevNode())) - if (CI->hasFnAttr(Attribute::NoReturn)) - return false; - return true; + if (CI->hasFnAttr(Attribute::NoReturn) && + !CI->getName().startswith("longjmp")) + return true; + return false; } + if (!EnableRandomOutlining) + return false; + + // Game of chance hypothesis: when most (80:20 rule) of the code is cold, + // a randomly selected basic block has a higher chance of being cold. Do this + // if the basic block is part of a diamond structure (if-else). + // NB: This causes non-deterministic outlining if rng is not seeded. + if (auto SuccPred = hasSingleSuccAndPred(BB)) { + auto Succ = SuccPred->first; + int SuccPredCount = pred_size(Succ); + if (SuccPredCount < 2) + return false; + auto Pred = SuccPred->second; + int PredSuccCount = succ_size(Pred); + if (PredSuccCount < 2) + return false; + double Chance = (std::rand() % PredSuccCount) / double(PredSuccCount); + // Threshold can be decreased to enable aggressive outlining. + if (Chance < 0.5) { + LLVM_DEBUG(dbgs() << "Randomly making cold: " << BB); + return true; + } + } return false; } @@ -159,7 +202,7 @@ F.setEntryCount(0); Changed = true; } - + F.setSectionPrefix(".unlikely"); return Changed; } @@ -183,7 +226,8 @@ } // end anonymous namespace /// Check whether \p F is inherently cold. -bool HotColdSplitting::isFunctionCold(const Function &F) const { +bool HotColdSplitting::isFunctionCold(Function &F, + bool HasProfileSummary) const { if (F.hasFnAttribute(Attribute::Cold)) return true; @@ -193,6 +237,12 @@ if (PSI->isFunctionEntryCold(&F)) return true; + if (HasProfileSummary) { + auto BFI = GetBFI(F); + if (BFI) + return PSI->isFunctionColdInCallGraph(&F, *BFI); + } + return false; } @@ -648,7 +698,7 @@ continue; // Detect inherently cold functions and mark them as such. - if (isFunctionCold(F)) { + if (isFunctionCold(F, HasProfileSummary)) { Changed |= markFunctionCold(F); continue; } @@ -667,6 +717,9 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) { if (skipModule(M)) return false; + // Initialize rng for deterministic aggressive outlining. + if (EnableDeterministicRandomOutlining) + std::srand(1); ProfileSummaryInfo *PSI = &getAnalysis().getPSI(); auto GTTI = [this](Function &F) -> TargetTransformInfo & { diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -98,7 +98,7 @@ EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); -cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden, +cl::opt EnableHotColdSplit("hot-cold-split", cl::init(true), cl::Hidden, cl::desc("Enable hot-cold splitting pass")); static cl::opt UseLoopVersioningLICM( @@ -754,7 +754,7 @@ // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + if (EnableHotColdSplit) MPM.add(createHotColdSplittingPass()); if (MergeFunctions)