diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h --- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h +++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h @@ -21,6 +21,7 @@ class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; +class MBFIWrapper; /// Returns true if machine function \p MF is suggested to be size-optimized /// based on the profile. @@ -33,6 +34,12 @@ ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType = PGSOQueryType::Other); +/// Returns true if machine basic block \p MBB is suggested to be size-optimized +/// based on the profile. +bool shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIWrapper, + PGSOQueryType QueryType = PGSOQueryType::Other); } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include #include @@ -42,7 +43,7 @@ const MachineModuleInfo *MMI; MachineRegisterInfo *MRI; MachineFunction *MF; - const MachineBlockFrequencyInfo *MBFI; + MBFIWrapper *MBFI; ProfileSummaryInfo *PSI; bool PreRegAlloc; bool LayoutMode; @@ -69,7 +70,7 @@ /// default implies using the command line value TailDupSize. void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, - const MachineBlockFrequencyInfo *MBFI, + MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize = 0); diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -63,10 +63,9 @@ F, PSI, *BFI); } -template -bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI, +template +bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI, BFIT *BFI, PGSOQueryType QueryType) { - assert(BB); if (!PSI || !BFI || !PSI->hasProfileSummary()) return false; if (ForcePGSO) @@ -81,11 +80,11 @@ if (PGSOColdCodeOnly || (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) { // Even if the working set size isn't large, size-optimize cold code. - return AdapterT::isColdBlock(BB, PSI, BFI); + return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI); } return !AdapterT::isHotBlockNthPercentile( PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf, - BB, PSI, BFI); + BBOrBlockFreq, PSI, BFI); } /// Returns true if function \p F is suggested to be size-optimized based on the diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -655,8 +655,8 @@ MachineFunction *MF = MBB1->getParent(); bool OptForSize = MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI())); + (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo)); return EffectiveTailLen >= 2 && OptForSize && (FullBlockTail1 || FullBlockTail2); } @@ -1511,7 +1511,7 @@ bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI()); + llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo); if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2082,8 +2082,7 @@ // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, - &MBFI->getMBFI()); + llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()); if (OptForSize) return L.getHeader(); @@ -2841,7 +2840,7 @@ continue; // If the global profiles indicates so, don't align it. - if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) && + if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) && !TLI->alignLoopsWithOptSize()) continue; @@ -3088,7 +3087,7 @@ if (OptForSize) TailDupSize = 1; bool PreRegAlloc = false; - TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI, + TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI, /* LayoutMode */ true, TailDupSize); precomputeTriangleChains(); } diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -33,6 +34,13 @@ return Count && PSI->isColdCount(*Count); } +bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isColdCount(*Count); +} + /// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock. static bool isHotBlockNthPercentile(int PercentileCutoff, const MachineBasicBlock *MBB, @@ -42,6 +50,14 @@ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); } +static bool isHotBlockNthPercentile(int PercentileCutoff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency()); + return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count); +} + /// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for /// MachineFunction. bool isFunctionColdInCallGraph( @@ -95,6 +111,11 @@ const MachineBlockFrequencyInfo *MBFI) { return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI); } + static bool isColdBlock(BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI); + } static bool isHotBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB, ProfileSummaryInfo *PSI, @@ -102,6 +123,13 @@ return machine_size_opts_detail::isHotBlockNthPercentile( CutOff, MBB, PSI, MBFI); } + static bool isHotBlockNthPercentile(int CutOff, + BlockFrequency BlockFreq, + ProfileSummaryInfo *PSI, + const MachineBlockFrequencyInfo *MBFI) { + return machine_size_opts_detail::isHotBlockNthPercentile( + CutOff, BlockFreq, PSI, MBFI); + } }; } // end anonymous namespace @@ -117,6 +145,19 @@ ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType) { + assert(MBB); return shouldOptimizeForSizeImpl( MBB, PSI, MBFI, QueryType); } + +bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, + ProfileSummaryInfo *PSI, + MBFIWrapper *MBFIW, + PGSOQueryType QueryType) { + assert(MBB); + if (!PSI || !MBFIW) + return false; + BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB); + return shouldOptimizeForSizeImpl( + BlockFreq, PSI, &MBFIW->getMBFI(), QueryType); +} diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; + std::unique_ptr MBFIW; bool PreRegAlloc; public: TailDuplicateBase(char &PassID, bool PreRegAlloc) @@ -88,7 +89,10 @@ auto *MBFI = (PSI && PSI->hasProfileSummary()) ? &getAnalysis().getBFI() : nullptr; - Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false); + if (MBFI) + MBFIW = std::make_unique(*MBFI); + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -80,7 +80,7 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPIin, - const MachineBlockFrequencyInfo *MBFIin, + MBFIWrapper *MBFIin, ProfileSummaryInfo *PSIin, bool LayoutModeIn, unsigned TailDupSizeIn) { MF = &MFin; diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, PGSOQueryType QueryType) { + assert(BB); return shouldOptimizeForSizeImpl(BB, PSI, BFI, QueryType); } diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -846,6 +846,72 @@ ret void } +; This triggers a situation where a new block (bb4 is split) is created and then +; would be passed to the PGSO interface llvm::shouldOptimizeForSize(). +@GV = global i32 0 +define void @bfi_new_block_pgso(i32 %c) nounwind { +; CHECK-LABEL: bfi_new_block_pgso: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB14_4 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: cmpl $16, %edi +; CHECK-NEXT: je .LBB14_6 +; CHECK-NEXT: # %bb.2: # %bb1 +; CHECK-NEXT: cmpl $17, %edi +; CHECK-NEXT: je .LBB14_7 +; CHECK-NEXT: # %bb.3: # %bb4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB14_4: # %bb5 +; CHECK-NEXT: cmpl $128, %edi +; CHECK-NEXT: jne .LBB14_8 +; CHECK-NEXT: # %bb.5: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB14_6: # %bb3 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: .LBB14_7: # %bb4 +; CHECK-NEXT: callq func +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB14_8: # %bb6 +; CHECK-NEXT: jmp tail_call_me # TAILCALL +entry: + %0 = icmp eq i32 %c, 0 + br i1 %0, label %bb5, label %bb1 + +bb1: + switch i32 %c, label %bb4 [ + i32 16, label %bb3 + i32 17, label %bb2 + ] + +bb2: + call void @func() + br label %bb4 + +bb3: + store i32 0, i32* @GV + call void @func() + br label %bb4 + +bb4: + tail call void @tail_call_me() + br label %return + +bb5: + switch i32 %c, label %bb6 [ + i32 128, label %return + ] + +bb6: + tail call void @tail_call_me() + br label %return + +return: + ret void +} + !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}