diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,6 +24,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" @@ -94,16 +96,26 @@ } } +static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { + return X.getSectionID().Type < Y.getSectionID().Type; + }; + llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); + llvm::avoidZeroOffsetLandingPad(MF); +} + static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, bool HasAccurateProfile) { std::optional Count = MBFI->getBlockProfileCount(&MBB); + // If using accurate profile, no count means cold. + // If no accurate profile, no count means "do not judge + // coldness". if (!Count) - return true; + return HasAccurateProfile; - if (PercentileCutoff > 0) { + if (PercentileCutoff > 0) return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); - } return (*Count < ColdCountThreshold); } @@ -140,9 +152,28 @@ MachineBlockFrequencyInfo *MBFI = nullptr; ProfileSummaryInfo *PSI = nullptr; + // Whether this pass is using FSAFDO profile (not accurate) or IRPGO + // (accurate). HasAccurateProfile is only used when UseProfileData is true, + // but giving it a default value to silent any possible warning. + bool HasAccurateProfile = false; if (UseProfileData) { MBFI = &getAnalysis(); PSI = &getAnalysis().getPSI(); + // "HasAccurateProfile" is false for FSAFDO, true when using IRPGO + // (traditional instrumented FDO) or CSPGO profiles. + HasAccurateProfile = + PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile(); + // If HasAccurateProfile is false, we only trust hot functions, + // which have many samples, and consider them as split + // candidates. On the other hand, if HasAccurateProfile (likeIRPGO), we + // trust both cold and hot functions. + if (!HasAccurateProfile && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) { + // Split all EH code and it's descendant statically by default. + if (SplitAllEHCode) + setDescendantEHBlocksCold(MF); + finishAdjustingBasicBlocksAndLandingPads(MF); + return true; + } } SmallVector LandingPads; @@ -152,7 +183,8 @@ if (MBB.isEHPad()) LandingPads.push_back(&MBB); - else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode) + else if (UseProfileData && + isColdBlock(MBB, MBFI, PSI, HasAccurateProfile) && !SplitAllEHCode) MBB.setSectionID(MBBSectionID::ColdSectionID); } @@ -161,9 +193,10 @@ setDescendantEHBlocksCold(MF); // We only split out eh pads if all of them are cold. else { + // Here we have UseProfileData == true. bool HasHotLandingPads = false; for (const MachineBasicBlock *LP : LandingPads) { - if (!isColdBlock(*LP, MBFI, PSI)) + if (!isColdBlock(*LP, MBFI, PSI, HasAccurateProfile)) HasHotLandingPads = true; } if (!HasHotLandingPads) { @@ -171,11 +204,8 @@ LP->setSectionID(MBBSectionID::ColdSectionID); } } - auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { - return X.getSectionID().Type < Y.getSectionID().Type; - }; - llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); - llvm::avoidZeroOffsetLandingPad(MF); + + finishAdjustingBasicBlocksAndLandingPads(MF); return true; } diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll --- a/llvm/test/CodeGen/X86/machine-function-splitter.ll +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -2,6 +2,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefix=MFS-EH-SPLIT +; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll +; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS +; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS2 + define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { ;; Check that cold block is moved to .text.split. ; MFS-DEFAULTS-LABEL: foo1 @@ -368,6 +372,66 @@ ret i32 %8 } +define void @foo14(i1 zeroext %0, i1 zeroext %1) nounwind !prof !24 { +; FSAFDO-MFS: .section .text.split.foo14,"ax" +; FSAFDO-MFS: foo14.cold: + br i1 %0, label %3, label %7, !prof !25 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !26 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 { +;; HasAccurateProfile is false, foo15 is hot, but no profile data for +;; blocks, no split should happen. +; FSAFDO-MFS2-NOT: .section .text.split.foo15,"ax" +; FSAFDO-MFS2-NOT: foo15.cold: + br i1 %0, label %3, label %7 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + declare i32 @bar() declare i32 @baz() declare i32 @bam() @@ -404,3 +468,7 @@ !21 = !{!"branch_weights", i32 6000, i32 4000} !22 = !{!"branch_weights", i32 80, i32 9920} !23 = !{!"function_entry_count", i64 7} +!24 = !{!"function_entry_count", i64 10000} +!25 = !{!"branch_weights", i32 0, i32 7000} +!26 = !{!"branch_weights", i32 1000, i32 6000} +!27 = !{!"function_entry_count", i64 10000}