diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -0,0 +1,27 @@ +//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H +#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H + +#include "llvm/ADT/STLExtras.h" + +namespace llvm { + +class MachineFunction; +class MachineBasicBlock; + +using MachineBasicBlockComparator = + function_ref; + +void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, + MachineBasicBlockComparator MBBCmp); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -114,6 +114,8 @@ bool getEmitCallSiteInfo(); +bool getEnableMachineFunctionSplitter(); + bool getEnableDebugEntryValues(); bool getForceDwarfFrameSection(); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -494,7 +494,8 @@ /// Returns true if this function has basic block sections enabled. bool hasBBSections() const { return (BBSectionsType == BasicBlockSection::All || - BBSectionsType == BasicBlockSection::List); + BBSectionsType == BasicBlockSection::List || + BBSectionsType == BasicBlockSection::Preset); } /// Returns true if basic block labels are to be generated for this function. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -50,6 +50,10 @@ /// selectively enable basic block sections. MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + /// createMachineFunctionSplitterPass - This pass splits machine functions + /// using profile information. + MachineFunctionPass *createMachineFunctionSplitterPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -280,6 +280,7 @@ void initializeMachineDominanceFrontierPass(PassRegistry&); void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); +void initializeMachineFunctionSplitterPass(PassRegistry &); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -67,6 +67,9 @@ Labels, // Do not use Basic Block Sections but label basic blocks. This // is useful when associating profile counts from virtual addresses // to basic blocks. + Preset, // Similar to list but the blocks are identified by passes which + // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. + // This option cannot be set via the command line. None // Do not use Basic Block Sections. }; @@ -124,10 +127,10 @@ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), - EmitCallSiteInfo(false), SupportsDebugEntryValues(false), - EnableDebugEntryValues(false), ForceDwarfFrameSection(false), - XRayOmitFunctionIndex(false), + EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), + EmitAddrsig(false), EmitCallSiteInfo(false), + SupportsDebugEntryValues(false), EnableDebugEntryValues(false), + ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -257,6 +260,9 @@ /// Enables the MachineOutliner pass. unsigned EnableMachineOutliner : 1; + /// Enables the MachineFunctionSplitter pass. + unsigned EnableMachineFunctionSplitter : 1; + /// Set if the target supports default outlining behaviour. unsigned SupportsDefaultOutlining : 1; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -69,6 +69,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -226,9 +227,9 @@ // and "Cold" succeeding all other clusters. // FuncBBClusterInfo represent the cluster information for basic blocks. If this // is empty, it means unique sections for all basic blocks in the function. -static bool assignSectionsAndSortBasicBlocks( - MachineFunction &MF, - const std::vector> &FuncBBClusterInfo) { +static void +assignSections(MachineFunction &MF, + const std::vector> &FuncBBClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -271,12 +272,51 @@ for (auto &MBB : MF) if (MBB.isEHPad()) MBB.setSectionID(EHPadsSectionID.getValue()); +} +void llvm::sortBasicBlocksAndUpdateBranches( + MachineFunction &MF, MachineBasicBlockComparator MBBCmp) { SmallVector PreLayoutFallThroughs( MF.getNumBlockIDs()); for (auto &MBB : MF) PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); + MF.sort(MBBCmp); + + // Set IsBeginSection and IsEndSection according to the assigned section IDs. + MF.assignBeginEndSections(); + + // After reordering basic blocks, we must update basic block branches to + // insert explicit fallthrough branches when required and optimize branches + // when possible. + updateBranches(MF, PreLayoutFallThroughs); +} + +bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { + auto BBSectionsType = MF.getTarget().getBBSectionsType(); + assert(BBSectionsType != BasicBlockSection::None && + "BB Sections not enabled!"); + // Renumber blocks before sorting them for basic block sections. This is + // useful during sorting, basic blocks in the same section will retain the + // default order. This renumbering should also be done for basic block + // labels to match the profiles with the correct blocks. + MF.RenumberBlocks(); + + if (BBSectionsType == BasicBlockSection::Labels) { + MF.setBBSectionsType(BBSectionsType); + MF.createBBLabels(); + return true; + } + + std::vector> FuncBBClusterInfo; + if (BBSectionsType == BasicBlockSection::List && + !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + FuncBBClusterInfo)) + return true; + MF.setBBSectionsType(BBSectionsType); + MF.createBBLabels(); + assignSections(MF, FuncBBClusterInfo); + // We make sure that the cluster including the entry basic block precedes all // other clusters. auto EntryBBSectionID = MF.front().getSectionID(); @@ -300,7 +340,8 @@ // contiguous and ordered accordingly. Furthermore, clusters are ordered in // increasing order of their section IDs, with the exception and the // cold section placed at the end of the function. - MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto Comparator = [&](const MachineBasicBlock &X, + const MachineBasicBlock &Y) { auto XSectionID = X.getSectionID(); auto YSectionID = Y.getSectionID(); if (XSectionID != YSectionID) @@ -311,43 +352,9 @@ return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; return X.getNumber() < Y.getNumber(); - }); - - // Set IsBeginSection and IsEndSection according to the assigned section IDs. - MF.assignBeginEndSections(); - - // After reordering basic blocks, we must update basic block branches to - // insert explicit fallthrough branches when required and optimize branches - // when possible. - updateBranches(MF, PreLayoutFallThroughs); - - return true; -} - -bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { - auto BBSectionsType = MF.getTarget().getBBSectionsType(); - assert(BBSectionsType != BasicBlockSection::None && - "BB Sections not enabled!"); - // Renumber blocks before sorting them for basic block sections. This is - // useful during sorting, basic blocks in the same section will retain the - // default order. This renumbering should also be done for basic block - // labels to match the profiles with the correct blocks. - MF.RenumberBlocks(); - - if (BBSectionsType == BasicBlockSection::Labels) { - MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); - return true; - } + }; - std::vector> FuncBBClusterInfo; - if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, - FuncBBClusterInfo)) - return true; - MF.setBBSectionsType(BBSectionsType); - MF.createBBLabels(); - assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo); + sortBasicBlocksAndUpdateBranches(MF, Comparator); return true; } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -82,6 +82,7 @@ MachineFunction.cpp MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp + MachineFunctionSplitter.cpp MachineInstrBundle.cpp MachineInstr.cpp MachineLICM.cpp diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -84,6 +84,7 @@ CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) +CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) @@ -400,6 +401,13 @@ cl::init(false)); CGBINDOPT(EnableDebugEntryValues); + static cl::opt EnableMachineFunctionSplitter( + "split-machine-functions", + cl::desc("Split out cold basic blocks from machine functions based on " + "profile information"), + cl::init(false)); + CGBINDOPT(EnableMachineFunctionSplitter); + static cl::opt ForceDwarfFrameSection( "force-dwarf-frame-section", cl::desc("Always emit a debug frame section."), cl::init(false)); @@ -472,6 +480,7 @@ Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; Options.ExceptionModel = getExceptionModel(); Options.EmitStackSizeSection = getEnableStackSizeSection(); + Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -0,0 +1,145 @@ +//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Uses profile information to split out cold blocks. +// +// This pass splits out cold machine basic blocks from the parent function. This +// implementation leverages the basic block section framework. Blocks marked +// cold by this pass are grouped together in a separate section prefixed with +// ".text.unlikely.*". The linker can then group these together as a cold +// section. The split part of the function is a contiguous region identified by +// the symbol "foo.cold". Grouping all cold blocks across functions together +// decreases fragmentation and improves icache and itlb utilization. Note that +// the overall changes to the binary size are negligible; only a small number of +// additional jump instructions may be introduced. +// +// For the original RFC of this pass please see +// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt + PercentileCutoff("mfs-psi-cutoff", + cl::desc("Percentile profile summary cutoff used to " + "determine cold blocks. Unused if set to zero."), + cl::init(0), cl::Hidden); + +static cl::opt ColdCountThreshold( + "mfs-count-threshold", + cl::desc( + "Minimum number of times a block must be executed to be retained."), + cl::init(1), cl::Hidden); + +namespace { + +class MachineFunctionSplitter : public MachineFunctionPass { +public: + static char ID; + MachineFunctionSplitter() : MachineFunctionPass(ID) { + initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Machine Function Splitter Transformation"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} // end anonymous namespace + +static bool isColdBlock(MachineBasicBlock &MBB, + const MachineBlockFrequencyInfo *MBFI, + ProfileSummaryInfo *PSI) { + Optional Count = MBFI->getBlockProfileCount(&MBB); + if (!Count.hasValue()) { + return true; + } + + if (PercentileCutoff > 0) { + return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); + } + return (*Count < ColdCountThreshold); +} + +bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // FIXME: We only target functions with profile data. Static information may + // also be considered but we don't see performance improvements yet. + if (!MF.getFunction().hasProfileData()) + return false; + + // FIXME: We don't split functions where a section attribute has been set + // since the split part may not be placed in a contiguous region. It may also + // be more beneficial to augment the linker to ensure contiguous layout of + // split functions within the same section as specified by the attribute. + if (!MF.getFunction().getSection().empty()) + return false; + + // We don't want to proceed further for cold functions + // or functions of unknown hotness. Lukewarm functions have no prefix. + Optional SectionPrefix = MF.getFunction().getSectionPrefix(); + if (SectionPrefix.hasValue() && + (SectionPrefix.getValue().equals(".unlikely") || + SectionPrefix.getValue().equals(".unknown"))) { + return false; + } + + MF.RenumberBlocks(); + MF.setBBSectionsType(BasicBlockSection::Preset); + auto *MBFI = &getAnalysis(); + auto *PSI = &getAnalysis().getPSI(); + + for (auto &MBB : MF) { + // We retain the entry block and conservatively keep all landing pad blocks + // as part of the original function. + if ((MBB.pred_empty() || MBB.isEHPad())) + continue; + if (isColdBlock(MBB, MBFI, PSI)) { + MBB.setSectionID(MBBSectionID::ColdSectionID); + } + } + + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { + return X.getSectionID().Type < Y.getSectionID().Type; + }; + llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); + + return true; +} + +void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +char MachineFunctionSplitter::ID = 0; +INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", + "Split machine functions using profile information", false, + false) + +MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { + return new MachineFunctionSplitter(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -212,6 +212,13 @@ cl::desc("Stop compilation before a specific pass"), cl::value_desc("pass-name"), cl::init(""), cl::Hidden); +/// Enable the machine function splitter pass. +/// FIXME: Remove this once clang option for this feature has been added. +static cl::opt EnableMachineFunctionSplitter( + "enable-split-machine-functions", cl::Hidden, + cl::desc("Split out cold blocks from machine functions based on profile " + "information.")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1014,8 +1021,14 @@ addPass(createMachineOutlinerPass(RunOnAllFunctions)); } - if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) + // Machine function splitter uses the basic block sections feature. Both + // cannot be enabled at the same time. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { + addPass(createMachineFunctionSplitterPass()); + } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + } // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -0,0 +1,218 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2 + +define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that cold block is moved to .text.unlikely. +; MFS-DEFAULTS-LABEL: foo1 +; MFS-DEFAULTS: .section .text.unlikely.foo1 +; MFS-DEFAULTS-NEXT: foo1.cold: +; MFS-DEFAULTS-NOT: callq bar +; MFS-DEFAULTS-NEXT: callq baz + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo2(i1 zeroext %0) nounwind !prof !23 !section_prefix !16 { +;; Check that function marked unlikely is not split. +; MFS-DEFAULTS-LABEL: foo2 +; MFS-DEFAULTS-NOT: foo2.cold: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo3(i1 zeroext %0) nounwind !section_prefix !15 { +;; Check that function without profile data is not split. +; MFS-DEFAULTS-LABEL: foo3 +; MFS-DEFAULTS-NOT: foo3.cold: + br i1 %0, label %2, label %4 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { +;; Check that count threshold works. +; MFS-OPTS1-LABEL: foo4 +; MFS-OPTS1: .section .text.unlikely.foo4 +; MFS-OPTS1-NEXT: foo4.cold: +; MFS-OPTS1-NOT: callq bar +; MFS-OPTS1-NOT: callq baz +; MFS-OPTS1-NEXT: callq bam + br i1 %0, label %3, label %7, !prof !18 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !19 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +define void @foo5(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { +;; Check that profile summary info cutoff works. +; MFS-OPTS2-LABEL: foo5 +; MFS-OPTS2: .section .text.unlikely.foo5 +; MFS-OPTS2-NEXT: foo5.cold: +; MFS-OPTS2-NOT: callq bar +; MFS-OPTS2-NOT: callq baz +; MFS-OPTS2-NEXT: callq bam + br i1 %0, label %3, label %7, !prof !21 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !22 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = call i32 @qux() + ret void +} + +define void @foo6(i1 zeroext %0) nounwind section "nosplit" !prof !14 { +;; Check that function in excluded section is not split. +; MFS-DEFAULTS-LABEL: foo6 +; MFS-DEFAULTS-NOT: foo6.cold: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define i32 @foo7(i1 zeroext %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !14 { +;; Check that cold ehpads are not split out. +; MFS-DEFAULTS-LABEL: foo7 +; MFS-DEFAULTS: .section .text.unlikely.foo7,"ax",@progbits +; MFS-DEFAULTS-NEXT: foo7.cold: +; MFS-DEFAULTS-NOT: callq _Unwind_Resume +; MFS-DEFAULTS: callq baz +entry: + invoke void @_Z1fv() + to label %try.cont unwind label %lpad + +lpad: + %1 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + resume { i8*, i32 } %1 + +try.cont: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = try.cont + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret i32 %7 +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() +declare void @_Z1fv() +declare i32 @__gxx_personality_v0(...) + +@_ZTIi = external constant i8* + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 7000} +!15 = !{!"function_section_prefix", !".hot"} +!16 = !{!"function_section_prefix", !".unlikely"} +!17 = !{!"branch_weights", i32 7000, i32 0} +!18 = !{!"branch_weights", i32 3000, i32 4000} +!19 = !{!"branch_weights", i32 1000, i32 6000} +!20 = !{!"function_entry_count", i64 10000} +!21 = !{!"branch_weights", i32 6000, i32 4000} +!22 = !{!"branch_weights", i32 80, i32 9920} +!23 = !{!"function_entry_count", i64 7}