Index: include/llvm/CodeGen/PagerandoBinning.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/PagerandoBinning.h @@ -0,0 +1,57 @@ +//===-- PagerandoBinning.h - Binning for Pagerando ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns Pagerando-enabled functions to bins. See the implementation +// file for a description of the algorithm. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PAGERANDOBINNING_H +#define LLVM_CODEGEN_PAGERANDOBINNING_H + +#include "llvm/Pass.h" +#include + +namespace llvm { + +class PagerandoBinnerBase : public ModulePass { +public: + explicit PagerandoBinnerBase(char &ID); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override; + + typedef unsigned Bin; + static constexpr unsigned MinFnSize = 2; // 'bx lr' on ARM thumb + static constexpr auto SectionPrefix = ".bin_"; + + bool runOnModule(Module &M) override; + + class FirstFitAlgo { + public: + Bin assignToBin(unsigned FnSize); + + private: + // bin numbers> + std::multimap Bins; + unsigned BinCount = 1; + }; + +protected: + virtual bool initializeBinning(Module &M); + virtual Bin getBinAssignment(Function &F) = 0; + + unsigned estimateFunctionSize(const Function &F); + +private: + static void setBin(Function &F, Bin Bin); +}; + +} // end namespace llvm + +#endif Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -367,6 +367,9 @@ /// LiveDebugValues pass extern char &LiveDebugValuesID; + /// PagerandoBinning - This pass assigns MachineFunctions to Pagerando bins. + ModulePass *createPagerandoBinningPass(); + /// createJumpInstrTables - This pass creates jump-instruction tables. ModulePass *createJumpInstrTablesPass(); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -292,6 +292,7 @@ void initializePAEvalPass(PassRegistry&); void initializePagerandoWrappersPass(PassRegistry&); void initializePEIPass(PassRegistry&); +void initializePGOBinnerPass(PassRegistry&); void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); @@ -361,6 +362,7 @@ void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); +void initializeSimpleBinnerPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -98,6 +98,7 @@ MIRPrintingPass.cpp MacroFusion.cpp OptimizePHIs.cpp + PagerandoBinning.cpp ParallelCG.cpp PeepholeOptimizer.cpp PHIElimination.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -72,6 +72,7 @@ initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); initializeOptimizePHIsPass(Registry); + initializePGOBinnerPass(Registry); initializePEIPass(Registry); initializePHIEliminationPass(Registry); initializePatchableFunctionPass(Registry); @@ -92,6 +93,7 @@ initializeSafeStackLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); + initializeSimpleBinnerPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); Index: lib/CodeGen/PagerandoBinning.cpp =================================================================== --- /dev/null +++ lib/CodeGen/PagerandoBinning.cpp @@ -0,0 +1,343 @@ +//===-- PagerandoBinning.cpp - Binning for Pagerando ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns Pagerando-enabled functions to bins. Normal functions +// (and currently also Pagerando wrappers) are not assigned to a bin. The bin +// size is 4KB. +// Function sizes are estimated by adding up the size of all instructions +// of the corresponding MachineFunction. To improve estimate accuracy this pass +// should run as late as possible, but must run before the Pagerando optimizer +// passes (since they rely on bin assignments). +// +// Binning strategies: +// -) Simple: a greedy algorithm that, for every function, picks the bin with +// the smallest remaining free space that still accommodates the function. If +// such a bin does not exist, a new one is created. Functions that are larger +// than the bin size are assigned to a new bin which forces the expansion of +// said bin. +// -) PGO: Profile-guided bin assignment. The algorithm attempts to bin together +// hot calls into the same bin by repeatedly combining hot functions with their +// caller with the hottest call site. Based on the C3 algorithm presented in +// "Optimizing Function Placement for Large-Scale Data-Center Applications," +// Ottoni and Maher, CGO 2017. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/PagerandoBinning.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "pagerando-binning" + +enum class BStrat { Simple, PGO }; +static cl::opt BinningStrategy( + "pagerando-binning-strategy", cl::Hidden, cl::init(BStrat::Simple), + cl::desc("Binning strategy for Pagerando"), cl::values( + clEnumValN(BStrat::Simple, "simple", "Simple greedy strategy"), + clEnumValN(BStrat::PGO, "pgo", "Profile-guided strategy"))); + +static cl::opt BinSize( + "pagerando-bin-size", cl::Hidden, cl::init(4096), + cl::desc("Target size for pagerando bins")); + + +namespace { + +class SimpleBinner : public PagerandoBinnerBase { +public: + SimpleBinner() : PagerandoBinnerBase(ID) { + initializeSimpleBinnerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + Bin getBinAssignment(Function &F) override; + +private: + FirstFitAlgo FitAlgo; +}; + +class PGOBinner : public PagerandoBinnerBase { +public: + PGOBinner() : PagerandoBinnerBase(ID) { + initializePGOBinnerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool initializeBinning(Module &M) override; + + Bin getBinAssignment(Function &F) override; + +private: + struct Cluster { + unsigned Size = 0; + SmallVector Functions; + + // Lazily initialized during getBinAssignment phase + PagerandoBinnerBase::Bin Bin = 0; + + Cluster(Function *F, unsigned InitialSize) + : Size(InitialSize), Functions{F} { } + + void merge(Cluster &Other) { + Size += Other.Size; + Functions.insert(Functions.end(), Other.Functions.begin(), Other.Functions.end()); + } + }; + + unsigned BinCount = 1; + + FirstFitAlgo FitAlgo; + + ProfileSummaryInfo *PSI; + bool HaveProfileInfo = false; + + using CallerWeights = DenseMap; + + std::map RevCG; + DenseMap> FnToCluster; + + std::shared_ptr getCluster(Function *F); + void mergeClusters(std::shared_ptr C1, std::shared_ptr C2); + + void createReverseWeightedCallGraph(CallGraph &CG); +}; + +} // end anonymous namespace + + +PagerandoBinnerBase::PagerandoBinnerBase(char &ID) : ModulePass(ID) {} + +void PagerandoBinnerBase::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + ModulePass::getAnalysisUsage(AU); +} + +bool PagerandoBinnerBase::initializeBinning(Module &M) { + return false; +} + +bool PagerandoBinnerBase::runOnModule(Module &M) { + bool Modified = initializeBinning(M); + + for (auto &F : M) { + if (F.isPagerando()) { + Bin B = getBinAssignment(F); + setBin(F, B); + Modified = true; + } + } + return Modified; +} + +void PagerandoBinnerBase::setBin(Function &F, Bin Bin) { + // Note: overwrites an existing section prefix + F.setSectionPrefix(SectionPrefix + utostr(Bin)); +} + +unsigned PagerandoBinnerBase::estimateFunctionSize(const Function &F) { + auto &MF = *getAnalysis().getMachineFunction(F); + auto *TII = MF.getSubtarget().getInstrInfo(); + + unsigned Size = 0; + for (auto &MBB : MF) + for (auto &MI : MBB) + Size += TII->getInstSizeInBytes(MI); + + return std::max(Size, MinFnSize+0); +} + + +char SimpleBinner::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleBinner, "pagerando-binning-simple", "Simple Function Binning", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfo) +INITIALIZE_PASS_END(SimpleBinner, "pagerando-binning-simple", "Simple Function Binning", + false, false) + +PagerandoBinnerBase::Bin SimpleBinner::getBinAssignment(Function &F) { + auto FnSize = estimateFunctionSize(F); + return FitAlgo.assignToBin(FnSize); +} + +void SimpleBinner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + PagerandoBinnerBase::getAnalysisUsage(AU); +} + + +PagerandoBinnerBase::Bin PagerandoBinnerBase::FirstFitAlgo::assignToBin(unsigned FnSize) { + unsigned Bin, FreeSpace; + + auto I = Bins.lower_bound(FnSize); + if (I != Bins.end()) { + std::tie(FreeSpace, Bin) = *I; + FreeSpace -= FnSize; + Bins.erase(I); + } else { // No bin with enough free space + Bin = BinCount++; + auto Size = FnSize % BinSize; + FreeSpace = (Size == 0) ? 0 : (BinSize - Size); + } + + if (FreeSpace >= PagerandoBinnerBase::MinFnSize) + Bins.emplace(FreeSpace, Bin); + + return Bin; +} + + +char PGOBinner::ID = 0; +INITIALIZE_PASS_BEGIN(PGOBinner, "pagerando-binning-pgo", "PGO Function Binning", + false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfo) +INITIALIZE_PASS_END(PGOBinner, "pagerando-binning-pgo", "PGO Function Binning", + false, false) + +bool PGOBinner::initializeBinning(Module &M) { + auto &CG = getAnalysis().getCallGraph(); + PSI = getAnalysis().getPSI(); + if (!PSI || !PSI->hasProfileSummary()) { + dbgs() << "No profiling information available. Falling back to simple first-fit\n"; + return false; + } + HaveProfileInfo = true; + + createReverseWeightedCallGraph(CG); + + std::vector Worklist; + for (auto &F : M) { + if (F.isPagerando()) + Worklist.push_back(&F); + } + std::stable_sort(Worklist.begin(), Worklist.end(), + [](Function *F1, Function *F2) { + auto C1 = F1->getEntryCount(); + auto C2 = F2->getEntryCount(); + if (!C1.hasValue()) + return false; + if (!C2.hasValue()) + return true; + return C1.getCount() > C2.getCount(); + }); + + std::vector > WeightedCallers; + for (auto Callee : Worklist) { + auto &WeightMap = RevCG[Callee]; + WeightedCallers.insert(WeightedCallers.end(), WeightMap.begin(), WeightMap.end()); + std::stable_sort(WeightedCallers.begin(), WeightedCallers.end(), + [](std::pair I1, + std::pair I2) { + return I1.second > I2.second; + }); + + // Select a cluster to merge into + auto CalleeCluster = getCluster(Callee); + for (auto I : WeightedCallers) { + Function *Caller = I.first; + auto CallerCluster = getCluster(Caller); + if (CallerCluster->Size + CalleeCluster->Size <= BinSize) { + mergeClusters(CallerCluster, CalleeCluster); + break; + } + } + + WeightedCallers.clear(); + } + + return false; +} + +void PGOBinner::createReverseWeightedCallGraph(CallGraph &CG) { + for (auto &CGI : CG) { + Function *Caller = CGI.second->getFunction(); + if (!Caller) // External node + continue; + if (!Caller->isPagerando()) + continue; + CallerWeights &Weights = RevCG[Caller]; + auto &BFI = getAnalysis(*Caller).getBFI(); + for (auto &CR : *CGI.second) { + Instruction *CallInst = cast(CR.first); + Function *Callee = CR.second->getFunction(); + if (!Callee) // External node + continue; + if (!Callee->isPagerando()) + continue; + auto Count = PSI->getProfileCount(CallInst, &BFI); + if (Count) + Weights[Callee] += *Count; + } + } +} + +std::shared_ptr PGOBinner::getCluster(Function *F) { + auto I = FnToCluster.find(F); + if (I != FnToCluster.end()) + return I->second; + + auto C = std::make_shared(F, estimateFunctionSize(*F)); + auto Insert = FnToCluster.try_emplace(F, C); + return C; +} + +void PGOBinner::mergeClusters(std::shared_ptr C1, std::shared_ptr C2) { + C1->merge(*C2); + for (auto *F : C2->Functions) + FnToCluster[F] = C1; +} + +PagerandoBinnerBase::Bin PGOBinner::getBinAssignment(Function &F) { + if (!HaveProfileInfo) { + // Fall back to simple binning + auto FnSize = estimateFunctionSize(F); + return FitAlgo.assignToBin(FnSize); + } + + auto Cluster = FnToCluster[&F]; + if (Cluster->Bin == 0) + Cluster->Bin = FitAlgo.assignToBin(Cluster->Size); + return Cluster->Bin; +} + +void PGOBinner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + PagerandoBinnerBase::getAnalysisUsage(AU); +} + + +ModulePass *llvm::createPagerandoBinningPass() { + switch (BinningStrategy.getValue()) { + case BStrat::Simple: return new SimpleBinner(); + case BStrat::PGO: return new PGOBinner(); + } + llvm_unreachable("Unexpected binning strategy"); +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -833,6 +833,9 @@ if (TM->Options.EnableIPRA) addPass(createRegUsageInfoPropPass()); + if (TM->isPagerando()) + addPass(createPagerandoBinningPass()); + // Run pre-ra passes. addPreRegAlloc(); Index: unittests/CodeGen/CMakeLists.txt =================================================================== --- unittests/CodeGen/CMakeLists.txt +++ unittests/CodeGen/CMakeLists.txt @@ -19,6 +19,7 @@ MachineInstrTest.cpp MachineOperandTest.cpp ScalableVectorMVTsTest.cpp + PagerandoBinningFirstFitTest.cpp ) add_subdirectory(GlobalISel) Index: unittests/CodeGen/PagerandoBinningFirstFitTest.cpp =================================================================== --- /dev/null +++ unittests/CodeGen/PagerandoBinningFirstFitTest.cpp @@ -0,0 +1,88 @@ +//===-- PagerandoBinningSimpleTest.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/PagerandoBinning.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +struct PagerandoBinningFirstFitTest : public testing::Test { + PagerandoBinnerBase::FirstFitAlgo Algo; + + void ASSERT_ASSIGNMENTS( + std::initializer_list> Assignments) { + for (auto &A : Assignments) { + unsigned FnSize, ExpectedBin; + std::tie(FnSize, ExpectedBin) = A; + unsigned Bin = Algo.assignToBin(FnSize); + ASSERT_EQ(Bin, ExpectedBin); + } + } +}; + +TEST_F(PagerandoBinningFirstFitTest, NeverReturnsDefaultBin) { + ASSERT_NE(Algo.assignToBin(100), 0u); +} + +TEST_F(PagerandoBinningFirstFitTest, UsesGreedyAlgorithm) { + ASSERT_ASSIGNMENTS({ + {3000, 1}, + {3000, 2}, + {1000, 1}, + {1000, 2}, + {1000, 3}, + }); +} + +TEST_F(PagerandoBinningFirstFitTest, UsesRemainingFreeSpace) { + ASSERT_ASSIGNMENTS({ + {3000, 1}, + {1000, 1}, + { 100, 2}, + { 90, 1}, + { 6, 1}, + { 1, 2}, + }); +} + +TEST_F(PagerandoBinningFirstFitTest, UsesBinWithLeastFreeSpace) { + ASSERT_ASSIGNMENTS({ + {3000, 1}, + {3001, 2}, + {3000, 3}, + { 100, 2}, + }); +} + +TEST_F(PagerandoBinningFirstFitTest, FreeSpaceMustBeAtLeastMinFnSize) { + ASSERT_ASSIGNMENTS({ + {4095, 1}, + { 1, 2}, + {4095, 2}, + }); +} + +TEST_F(PagerandoBinningFirstFitTest, BinSizedFunctionsAlwaysGetTheirOwnBin) { + ASSERT_ASSIGNMENTS({ + {4096, 1}, + {8192, 2}, + { 1, 3}, + }); +} + +TEST_F(PagerandoBinningFirstFitTest, LargeFunctionsAreStillPacked) { + ASSERT_ASSIGNMENTS({ + {8000, 1}, + { 100, 1}, + }); +} + +} // end anonymous namespace