diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -58,9 +58,12 @@ /// basic blocks and is enabled with -fbasic-block-sections. MachineFunctionPass *createBasicBlockSectionsPass(); - /// createMachineFunctionSplitterPass - This pass splits machine functions - /// using profile information. - MachineFunctionPass *createMachineFunctionSplitterPass(); + /// createMachineFunctionSplitterPass - This pass splits machine + /// functions using profile information. Set "HasAccurateProfile" + /// to false when using FSAFDO or CSSPGO, set to true when IRPGO + /// (traditional instrumented FDO) profile is used. + MachineFunctionPass * + createMachineFunctionSplitterPass(bool HasAccurateProfile = true); /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,7 +24,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHUtils.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -69,7 +72,8 @@ class MachineFunctionSplitter : public MachineFunctionPass { public: static char ID; - MachineFunctionSplitter() : MachineFunctionPass(ID) { + MachineFunctionSplitter(bool HasAccurateProfile) + : MachineFunctionPass(ID), HasAccurateProfile(HasAccurateProfile) { initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); } @@ -80,6 +84,11 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &F) override; + +private: + // Whether this pass is using FSAFDO profile (not accurate) or IRPGO + // (accurate). + bool HasAccurateProfile = true; }; } // end anonymous namespace @@ -94,11 +103,23 @@ } } +static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { + return X.getSectionID().Type < Y.getSectionID().Type; + }; + llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); + llvm::avoidZeroOffsetLandingPad(MF); +} + static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, bool HasAccurateProfile) { std::optional Count = MBFI->getBlockProfileCount(&MBB); - if (!Count) + if (!HasAccurateProfile) { + return Count.has_value() && Count.value() < ColdCountThreshold; + } + + if (!Count.has_value()) return true; if (PercentileCutoff > 0) { @@ -143,6 +164,18 @@ if (UseProfileData) { MBFI = &getAnalysis(); PSI = &getAnalysis().getPSI(); + // If we do not have AccurateProfile, we only trust hot + // functions, which have many samples, and consider them as + // split candidates. On the otherhand, if we do have + // AccurateProfile (like IRPGO), we trust both cold and hot + // functions. + if (!HasAccurateProfile && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) { + // Split all EH code and it's descendant statically by default. + if (SplitAllEHCode) + setDescendantEHBlocksCold(MF); + finishAdjustingBasicBlocksAndLandingPads(MF); + return true; + } } SmallVector LandingPads; @@ -152,7 +185,8 @@ if (MBB.isEHPad()) LandingPads.push_back(&MBB); - else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) + else if (UseProfileData && + isColdBlock(MBB, MBFI, PSI, HasAccurateProfile) && !SplitAllEHCode) MBB.setSectionID(MBBSectionID::ColdSectionID); } @@ -163,7 +197,7 @@ else { bool HasHotLandingPads = false; for (const MachineBasicBlock *LP : LandingPads) { - if (!isColdBlock(*LP, MBFI, PSI)) + if (!isColdBlock(*LP, MBFI, PSI, HasAccurateProfile)) HasHotLandingPads = true; } if (!HasHotLandingPads) { @@ -171,15 +205,13 @@ LP->setSectionID(MBBSectionID::ColdSectionID); } } - auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { - return X.getSectionID().Type < Y.getSectionID().Type; - }; - llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); - llvm::avoidZeroOffsetLandingPad(MF); + + finishAdjustingBasicBlocksAndLandingPads(MF); return true; } void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -190,6 +222,10 @@ "Split machine functions using profile information", false, false) -MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { - return new MachineFunctionSplitter(); +// Set "HasAccurateProfile" to false when we FSAFDO or CSSPGO profiles +// are used. Set to true when using IRPGO (traditional instrumented +// FDO) profiles. +MachineFunctionPass * +llvm::createMachineFunctionSplitterPass(bool HasAccurateProfile) { + return new MachineFunctionSplitter(HasAccurateProfile); } diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefix=MFS-EH-SPLIT +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -enable-fs-discriminator -improved-fs-discriminator=true | FileCheck %s --check-prefix=FSAFDO-MFS + define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { ;; Check that cold block is moved to .text.split. ; MFS-DEFAULTS-LABEL: foo1 @@ -9,6 +11,9 @@ ; MFS-DEFAULTS-NEXT: foo1.cold: ; MFS-DEFAULTS-NOT: callq bar ; MFS-DEFAULTS-NEXT: callq baz +; FSAFDO-MFS: .section .text.split.foo15,"ax" +; FSAFDO-MFS: foo15.cold: + br i1 %0, label %2, label %4, !prof !17 2: ; preds = %1 @@ -368,6 +373,33 @@ ret i32 %8 } +define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !24 { + br i1 %0, label %3, label %7, !prof !25 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !26 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() @@ -404,3 +436,6 @@ !21 = !{!"branch_weights", i32 6000, i32 4000} !22 = !{!"branch_weights", i32 80, i32 9920} !23 = !{!"function_entry_count", i64 7} +!24 = !{!"function_entry_count", i64 10000} +!25 = !{!"branch_weights", i32 0, i32 7000} +!26 = !{!"branch_weights", i32 1000, i32 6000} \ No newline at end of file