diff --git a/llvm/include/llvm/CodeGen/MBFIWrapper.h b/llvm/include/llvm/CodeGen/MBFIWrapper.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MBFIWrapper.h @@ -0,0 +1,46 @@ +//===- llvm/CodeGen/MBFIWrapper.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class keeps track of branch frequencies of newly created blocks and +// transformed blocks. Used by the TailDuplication and MachineBlockPlacement. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MBFIWRAPPER_H +#define LLVM_CODEGEN_MBFIWRAPPER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/BlockFrequency.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineBlockFrequencyInfo; + +class MBFIWrapper { + public: + MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + raw_ostream &printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const; + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const; + void view(const Twine &Name, bool isSimple = true); + uint64_t getEntryFreq() const; + const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } + + private: + const MachineBlockFrequencyInfo &MBFI; + DenseMap MergedBBFreq; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_MBFIWRAPPER_H diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h @@ -21,6 +21,7 @@ class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; +class MBFIWrapper; /// Returns true if machine function \p MF is suggested to be size-optimized /// based on the profile. @@ -33,6 +34,12 @@ ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType = PGSOQueryType::Other); +/// Returns true if machine basic block \p MBB is suggested to be size-optimized +/// based on the profile. +bool shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIWrapper, + PGSOQueryType QueryType = PGSOQueryType::Other); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include #include @@ -42,7 +43,7 @@ const MachineModuleInfo *MMI; MachineRegisterInfo *MRI; MachineFunction *MF; - const MachineBlockFrequencyInfo *MBFI; + MBFIWrapper *MBFI; ProfileSummaryInfo *PSI; bool PreRegAlloc; bool LayoutMode; @@ -69,7 +70,7 @@ /// default implies using the command line value TailDupSize. void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, - const MachineBlockFrequencyInfo *MBFI, + MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize = 0); diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -88,6 +88,31 @@ BB, PSI, BFI); } +template +bool shouldOptimizeForSizeImpl(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, BFIT *BFI, + PGSOQueryType QueryType) { + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + if (ForcePGSO) + return true; + if (!EnablePGSO) + return false; + // Temporarily enable size optimizations only for the IR pass or test query + // sites for gradual commit/rollout. This is to be removed later. + if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || + QueryType == PGSOQueryType::Test)) + return false; + if (PGSOColdCodeOnly || + (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { + // Even if the working set size isn't large, size-optimize cold code. + return AdapterT::isColdBlock(BlockFreq, PSI, BFI); + } + return !AdapterT::isHotBlockNthPercentile( + PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf, + BlockFreq, PSI, BFI); +} + /// Returns true if function \p F is suggested to be size-optimized based on the /// profile. bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -13,6 +13,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/Compiler.h" #include @@ -34,8 +35,6 @@ class LLVM_LIBRARY_VISIBILITY BranchFolder { public: - class MBFIWrapper; - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, MBFIWrapper &FreqInfo, @@ -132,28 +131,6 @@ MachineLoopInfo *MLI; LivePhysRegs LiveRegs; - public: - /// This class keeps track of branch frequencies of newly created - /// blocks and tail-merged blocks. - class MBFIWrapper { - public: - MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} - - BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; - void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); - raw_ostream &printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const; - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const; - void view(const Twine &Name, bool isSimple = true); - uint64_t getEntryFreq() const; - const MachineBlockFrequencyInfo &getMBFI() { return MBFI; } - - private: - const MachineBlockFrequencyInfo &MBFI; - DenseMap MergedBBFreq; - }; - private: MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -129,7 +129,7 @@ // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder::MBFIWrapper MBBFreqInfo( + MBFIWrapper MBBFreqInfo( getAnalysis()); BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis(), @@ -501,42 +501,6 @@ #endif } -BlockFrequency -BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { - auto I = MergedBBFreq.find(MBB); - - if (I != MergedBBFreq.end()) - return I->second; - - return MBFI.getBlockFreq(MBB); -} - -void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, - BlockFrequency F) { - MergedBBFreq[MBB] = F; -} - -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const MachineBasicBlock *MBB) const { - return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); -} - -raw_ostream & -BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, - const BlockFrequency Freq) const { - return MBFI.printBlockFreq(OS, Freq); -} - -void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) { - MBFI.view(Name, isSimple); -} - -uint64_t -BranchFolder::MBFIWrapper::getEntryFreq() const { - return MBFI.getEntryFreq(); -} - /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -591,7 +555,7 @@ MachineBasicBlock *PredBB, DenseMap &EHScopeMembership, bool AfterPlacement, - BranchFolder::MBFIWrapper &MBBFreqInfo, + MBFIWrapper &MBBFreqInfo, ProfileSummaryInfo *PSI) { // It is never profitable to tail-merge blocks from two different EH scopes. if (!EHScopeMembership.empty()) { @@ -691,8 +655,8 @@ MachineFunction *MF = MBB1->getParent(); bool OptForSize = MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); + (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo)); return EffectiveTailLen >= 2 && OptForSize && (FullBlockTail1 || FullBlockTail2); } @@ -1547,7 +1511,7 @@ bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); + llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo); if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -99,6 +99,7 @@ MachineVerifier.cpp ModuloSchedule.cpp PatchableFunction.cpp + MBFIWrapper.cpp MIRPrinter.cpp MIRPrintingPass.cpp MacroFusion.cpp diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -447,7 +447,7 @@ TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); - BranchFolder::MBFIWrapper MBFI(getAnalysis()); + MBFIWrapper MBFI(getAnalysis()); MBPI = &getAnalysis(); ProfileSummaryInfo *PSI = &getAnalysis().getPSI(); diff --git a/llvm/lib/CodeGen/MBFIWrapper.cpp b/llvm/lib/CodeGen/MBFIWrapper.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MBFIWrapper.cpp @@ -0,0 +1,49 @@ +//===- MBFIWrapper.cpp - MachineBlockFrequencyInfo wrapper ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class keeps track of branch frequencies of newly created blocks and +// transformed blocks. Used by the TailDuplication and MachineBlockPlacement. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" + +using namespace llvm; + +BlockFrequency MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; +} + +raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); +} + +raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI.printBlockFreq(OS, Freq); +} + +void MBFIWrapper::view(const Twine &Name, bool isSimple) { + MBFI.view(Name, isSimple); +} + +uint64_t MBFIWrapper::getEntryFreq() const { + return MBFI.getEntryFreq(); +} diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -346,7 +346,7 @@ const MachineBranchProbabilityInfo *MBPI; /// A handle to the function-wide block frequency pass. - std::unique_ptr MBFI; + std::unique_ptr MBFI; /// A handle to the loop info. MachineLoopInfo *MLI; @@ -2082,8 +2082,7 @@ // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, - &MBFI->getMBFI()); + llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()); if (OptForSize) return L.getHeader(); @@ -2841,7 +2840,7 @@ continue; // If the global profiles indicates so, don't align it. - if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && + if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) && !TLI->alignLoopsWithOptSize()) continue; @@ -3046,7 +3045,7 @@ F = &MF; MBPI = &getAnalysis(); - MBFI = std::make_unique( + MBFI = std::make_unique( getAnalysis()); MLI = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); @@ -3088,7 +3087,7 @@ if (OptForSize) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, + TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -33,6 +34,13 @@ return Count && PSI->isColdCount(*Count); } +bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isColdCount(*Count); +} + /// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. static bool isHotBlockNthPercentile(int PercentileCutoff, const MachineBasicBlock *MBB, @@ -42,6 +50,14 @@ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); } +static bool isHotBlockNthPercentile(int PercentileCutoff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); +} + /// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for /// MachineFunction. bool isFunctionColdInCallGraph( @@ -95,6 +111,11 @@ const MachineBlockFrequencyInfo *MBFI) { return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); } + static bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI); + } static bool isHotBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, @@ -102,6 +123,13 @@ return machine_size_opts_detail::isHotBlockNthPercentile( CutOff, MBB, PSI, MBFI); } + static bool isHotBlockNthPercentile(int CutOff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isHotBlockNthPercentile( + CutOff, BlockFreq, PSI, MBFI); + } }; } // end anonymous namespace @@ -120,3 +148,14 @@ return shouldOptimizeForSizeImpl( MBB, PSI, MBFI, QueryType); } + +bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIW, + PGSOQueryType QueryType) { + if (!MBB || !PSI || !MBFIW) + return false; + BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB); + return shouldOptimizeForSizeImpl( + BlockFreq, PSI, &MBFIW->getMBFI(), QueryType); +} diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; + std::unique_ptr MBFIW; bool PreRegAlloc; public: TailDuplicateBase(char &PassID, bool PreRegAlloc) @@ -88,7 +89,10 @@ auto *MBFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis().getBFI() : nullptr; - Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); + if (MBFI) + MBFIW = std::make_unique(*MBFI); + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -80,7 +80,7 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, - const MachineBlockFrequencyInfo *MBFIin, + MBFIWrapper *MBFIin, ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -846,6 +846,72 @@ ret void } +; This triggers a situation where a new block (bb4 is split) is created and then +; would be passed to the PGSO interface llvm::shouldOptimizeForSize(). +@GV = global i32 0 +define void @bfi_new_block_pgso(i32 %c) nounwind { +; CHECK-LABEL: bfi_new_block_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB14_4 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: cmpl $16, %edi +; CHECK-NEXT: je .LBB14_6 +; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: cmpl $17, %edi +; CHECK-NEXT: je .LBB14_7 +; CHECK-NEXT: # %bb.3: # %bb4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB14_4: # %bb5 +; CHECK-NEXT: cmpl $128, %edi +; CHECK-NEXT: jne .LBB14_8 +; CHECK-NEXT: # %bb.5: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_6: # %bb3 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: .LBB14_7: # %bb4 +; CHECK-NEXT: callq func +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB14_8: # %bb6 +; CHECK-NEXT: jmp tail_call_me # TAILCALL +entry: + %0 = icmp eq i32 %c, 0 + br i1 %0, label %bb5, label %bb1 + +bb1: + switch i32 %c, label %bb4 [ + i32 16, label %bb3 + i32 17, label %bb2 + ] + +bb2: + call void @func() + br label %bb4 + +bb3: + store i32 0, i32* @GV + call void @func() + br label %bb4 + +bb4: + tail call void @tail_call_me() + br label %return + +bb5: + switch i32 %c, label %bb6 [ + i32 128, label %return + ] + +bb6: + tail call void @tail_call_me() + br label %return + +return: + ret void +} + !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}