diff --git a/llvm/include/llvm/Analysis/EHUtils.h b/llvm/include/llvm/Analysis/EHUtils.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/EHUtils.h @@ -0,0 +1,90 @@ +//===-- Analysis/EHUtils.h - Exception handling related utils --*-//C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#ifndef LLVM_ANALYSIS_EHUTILS_H +#define LLVM_ANALYSIS_EHUTILS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" + +namespace llvm { + +/// Compute a list of blocks that are only reachable via EH paths. +template +static void computeEHOnlyBlocks(FunctionT &F, DenseSet &EHBlocks) { + // A block can be unknown if its not reachable from anywhere + // EH if its only reachable from start blocks via some path through EH pads + // NonEH if it's reachable from Non EH blocks as well. + enum Status { Unknown = 0, EH = 1, NonEH = 2 }; + DenseSet WorkList; + DenseMap Statuses; + + auto GetStatus = [&](BlockT *BB) { + if (Statuses.find(BB) != Statuses.end()) + return Statuses[BB]; + else + return Unknown; + }; + + auto CheckPredecessors = [&](BlockT *BB, Status Stat) { + for (auto *PredBB : AccessorTraits::getPredecessors(BB)) { + Status PredStatus = GetStatus(PredBB); + // If status of predecessor block has gone above current block + // we update current blocks status. + if (PredStatus > Stat) + Stat = PredStatus; + } + return Stat; + }; + + auto AddSuccesors = [&](BlockT *BB) { + for (auto *SuccBB : AccessorTraits::getSuccessors(BB)) { + if (!SuccBB->isEHPad()) + WorkList.insert(SuccBB); + } + }; + + // Insert the successors of start block and landing pads successor. + BlockT *StartBlock = &F.front(); + Statuses[StartBlock] = NonEH; + AddSuccesors(StartBlock); + + for (auto &BB : F) { + if (BB.isEHPad()) { + AddSuccesors(&BB); + Statuses[&BB] = EH; + } + } + + // Worklist iterative algorithm. + while (!WorkList.empty()) { + auto *BB = *WorkList.begin(); + WorkList.erase(BB); + + Status OldStatus = GetStatus(BB); + + // Check on predecessors and check for + // Status update. + Status NewStatus = CheckPredecessors(BB, OldStatus); + + // Did the block status change? + bool Changed = OldStatus != NewStatus; + if (Changed) { + AddSuccesors(BB); + Statuses[BB] = NewStatus; + } + } + + EHBlocks.clear(); + for (auto Entry : Statuses) { + if (Entry.second == EH) + EHBlocks.insert(Entry.first); + } +} +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp --- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -83,75 +84,21 @@ } // end anonymous namespace /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable -/// only by EH pad as cold. This will help mark EH pads statically cold instead -/// of relying on profile data. -static void -setDescendantEHBlocksCold(SmallVectorImpl &EHBlocks, - MachineFunction &MF) { - MachineBasicBlock *StartBlock = &MF.front(); - // A block can be unknown if its not reachable from anywhere - // EH if its only reachable from start blocks via some path through EH pads - // NonEH if it's reachable from Non EH blocks as well. - enum Status { Unknown = 0, EH = 1, NonEH = 2 }; - DenseSet WorkList; - DenseMap Statuses; - - auto getStatus = [&](MachineBasicBlock *MBB) { - if (Statuses.find(MBB) != Statuses.end()) - return Statuses[MBB]; - else - return Unknown; - }; - - auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) { - for (auto *PredMBB : MBB->predecessors()) { - Status PredStatus = getStatus(PredMBB); - // If status of predecessor block has gone above current block - // we update current blocks status. - if (PredStatus > Stat) - Stat = PredStatus; +/// only by EH pad as cold. This will help mark EH pads statically cold +/// instead of relying on profile data. +static void setDescendantEHBlocksCold(MachineFunction &MF) { + struct MIRTraits { + static auto getPredecessors(MachineBasicBlock *BB) { + return BB->predecessors(); } - return Stat; - }; - - auto addSuccesors = [&](MachineBasicBlock *MBB) { - for (auto *SuccMBB : MBB->successors()) { - if (!SuccMBB->isEHPad()) - WorkList.insert(SuccMBB); + static auto getSuccessors(MachineBasicBlock *BB) { + return BB->successors(); } }; - - // Insert the successors of start block - // and landing pads successor. - Statuses[StartBlock] = NonEH; - addSuccesors(StartBlock); - for (auto *LP : EHBlocks) { - addSuccesors(LP); - Statuses[LP] = EH; - } - - // Worklist iterative algorithm. - while (!WorkList.empty()) { - auto *MBB = *WorkList.begin(); - WorkList.erase(MBB); - - Status OldStatus = getStatus(MBB); - - // Check on predecessors and check for - // Status update. - Status NewStatus = checkPredecessors(MBB, OldStatus); - - // Did the block status change? - bool changed = OldStatus != NewStatus; - if (changed) { - addSuccesors(MBB); - Statuses[MBB] = NewStatus; - } - } - - for (auto Entry : Statuses) { - if (Entry.second == EH) - Entry.first->setSectionID(MBBSectionID::ColdSectionID); + DenseSet EHBlocks; + computeEHOnlyBlocks(MF, EHBlocks); + for (auto Block : EHBlocks) { + Block->setSectionID(MBBSectionID::ColdSectionID); } } @@ -219,7 +166,7 @@ // Split all EH code and it's descendant statically by default. if (SplitAllEHCode) - setDescendantEHBlocksCold(LandingPads, MF); + setDescendantEHBlocksCold(MF); // We only split out eh pads if all of them are cold. else { bool HasHotLandingPads = false; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/EHUtils.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -28,11 +29,12 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include #include using namespace llvm; -#define DEBUG_TYPE "sample-profile-probe" +#define DEBUG_TYPE "pseudo-probe" STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); @@ -253,8 +255,14 @@ } void SampleProfileProber::computeProbeIdForBlocks() { + DenseSet KnownColdBlocks; + computeEHOnlyBlocks>(*F, KnownColdBlocks); + // Insert pseudo probe to non-cold blocks only. This will reduce IR size as + // well as the binary size while retaining the profile quality. for (auto &BB : *F) { - BlockProbeIds[&BB] = ++LastProbeId; + ++LastProbeId; + if (!KnownColdBlocks.contains(&BB)) + BlockProbeIds[&BB] = LastProbeId; } } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll @@ -0,0 +1,43 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes=pseudo-probe -function-sections -S -o - | FileCheck %s + +;; Check the generation of pseudoprobe intrinsic call for non-EH blocks only. + +declare i32 @__gxx_personality_v0(...) +declare i32 @llvm.eh.typeid.for(ptr) nounwind +declare ptr @__cxa_begin_catch(ptr) +declare void @__cxa_end_catch() +declare void @bar() + +@_ZTIi = external constant ptr + +define void @foo() uwtable ssp personality ptr @__gxx_personality_v0 { +entry: +; CHECK: call void @llvm.pseudoprobe + invoke void @bar() + to label %ret unwind label %lpad + +ret: +; CHECK: call void @llvm.pseudoprobe + ret void + +lpad: ; preds = %entry +; CHECK-NOT: call void @llvm.pseudoprobe + %exn = landingpad {ptr, i32} + catch ptr @_ZTIi + %eh.exc = extractvalue { ptr, i32 } %exn, 0 + %eh.selector = extractvalue { ptr, i32 } %exn, 1 + %0 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind + %1 = icmp eq i32 %eh.selector, %0 + br i1 %1, label %catch, label %eh.resume + +catch: +; CHECK-NOT: call void @llvm.pseudoprobe + %ignored = call ptr @__cxa_begin_catch(ptr %eh.exc) nounwind + call void @__cxa_end_catch() nounwind + br label %ret + +eh.resume: +; CHECK-NOT: call void @llvm.pseudoprobe + resume { ptr, i32 } %exn +}