Changeset View
Standalone View
lib/Transforms/IPO/PruneEH.cpp
Show All 16 Lines | |||||
#include "llvm/Transforms/IPO.h" | #include "llvm/Transforms/IPO.h" | ||||
#include "llvm/ADT/SmallPtrSet.h" | #include "llvm/ADT/SmallPtrSet.h" | ||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||
#include "llvm/ADT/Statistic.h" | #include "llvm/ADT/Statistic.h" | ||||
#include "llvm/Support/raw_ostream.h" | #include "llvm/Support/raw_ostream.h" | ||||
#include "llvm/Analysis/CallGraph.h" | #include "llvm/Analysis/CallGraph.h" | ||||
#include "llvm/Analysis/CallGraphSCCPass.h" | #include "llvm/Analysis/CallGraphSCCPass.h" | ||||
#include "llvm/Analysis/LibCallSemantics.h" | #include "llvm/Analysis/LibCallSemantics.h" | ||||
#include "llvm/Analysis/TargetLibraryInfo.h" | |||||
#include "llvm/IR/CFG.h" | #include "llvm/IR/CFG.h" | ||||
#include "llvm/IR/Constants.h" | #include "llvm/IR/Constants.h" | ||||
#include "llvm/IR/Function.h" | #include "llvm/IR/Function.h" | ||||
#include "llvm/IR/InlineAsm.h" | #include "llvm/IR/InlineAsm.h" | ||||
#include "llvm/IR/Instructions.h" | #include "llvm/IR/Instructions.h" | ||||
#include "llvm/IR/IntrinsicInst.h" | #include "llvm/IR/IntrinsicInst.h" | ||||
#include "llvm/IR/LLVMContext.h" | #include "llvm/IR/LLVMContext.h" | ||||
#include <algorithm> | #include <algorithm> | ||||
using namespace llvm; | using namespace llvm; | ||||
#define DEBUG_TYPE "prune-eh" | #define DEBUG_TYPE "prune-eh" | ||||
STATISTIC(NumRemoved, "Number of invokes removed"); | STATISTIC(NumRemoved, "Number of invokes removed"); | ||||
STATISTIC(NumUnreach, "Number of noreturn calls optimized"); | STATISTIC(NumUnreach, "Number of noreturn calls optimized"); | ||||
namespace { | namespace { | ||||
struct PruneEH : public CallGraphSCCPass { | struct PruneEH : public CallGraphSCCPass { | ||||
static char ID; // Pass identification, replacement for typeid | static char ID; // Pass identification, replacement for typeid | ||||
PruneEH() : CallGraphSCCPass(ID) { | TargetLibraryInfo *TLI; | ||||
bool MarkNoInlineInEHR; | |||||
PruneEH() : CallGraphSCCPass(ID), MarkNoInlineInEHR(false) { | |||||
majnemer: Please do not have a space between the variable and the parenthesis. | |||||
initializePruneEHPass(*PassRegistry::getPassRegistry()); | initializePruneEHPass(*PassRegistry::getPassRegistry()); | ||||
} | } | ||||
// runOnSCC - Analyze the SCC, performing the transformation if possible. | // runOnSCC - Analyze the SCC, performing the transformation if possible. | ||||
bool runOnSCC(CallGraphSCC &SCC) override; | bool runOnSCC(CallGraphSCC &SCC) override; | ||||
bool doFinalization(CallGraph &CG) override; | |||||
void getAnalysisUsage(AnalysisUsage &AU) const override; | |||||
bool SimplifyFunction(Function *F); | bool SimplifyFunction(Function *F); | ||||
void DeleteBasicBlock(BasicBlock *BB); | void DeleteBasicBlock(BasicBlock *BB); | ||||
void AddNoinlineForCSInEH(Function *F); | |||||
void AddNoInlineForEHAllocUsers(Value *EHAlloc, CallInst *EHAllocCall); | |||||
}; | }; | ||||
} | } | ||||
char PruneEH::ID = 0; | char PruneEH::ID = 0; | ||||
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", | INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", | ||||
"Remove unused exception handling info", false, false) | "Remove unused exception handling info", false, false) | ||||
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) | ||||
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) | |||||
INITIALIZE_PASS_END(PruneEH, "prune-eh", | INITIALIZE_PASS_END(PruneEH, "prune-eh", | ||||
"Remove unused exception handling info", false, false) | "Remove unused exception handling info", false, false) | ||||
Pass *llvm::createPruneEHPass() { return new PruneEH(); } | Pass *llvm::createPruneEHPass() { return new PruneEH(); } | ||||
void PruneEH::getAnalysisUsage(AnalysisUsage &AU) const { | |||||
AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||||
CallGraphSCCPass::getAnalysisUsage(AU); | |||||
} | |||||
bool PruneEH::doFinalization(CallGraph &CG) { | |||||
hfinkelUnsubmitted As I understand it, you want this flag to apply per-module, not per run over the call graph. So you need to do this in 'bool doFinalization(Module &)', not in 'doFinalization(CallGraph &CG)' which is called at the end of the call graph's runOnModule function. If there's no functional difference, then you don't need the flag at all. hfinkel: As I understand it, you want this flag to apply per-module, not per run over the call graph. So… | |||||
junbumlAuthorUnsubmitted As you mention, doFinalization(CallGraph &CG) is called at the end of runOnModule() of CGPassManager. Since doFinalization(CallGraph &CG) is called after all SCCs in the module has been processed, I believe this is called once per module. junbuml: As you mention, doFinalization(CallGraph &CG) is called at the end of runOnModule() of… | |||||
Indeed, I agree. hfinkel: Indeed, I agree. | |||||
MarkNoInlineInEHR = false; | |||||
return false; | |||||
} | |||||
bool PruneEH::runOnSCC(CallGraphSCC &SCC) { | bool PruneEH::runOnSCC(CallGraphSCC &SCC) { | ||||
SmallPtrSet<CallGraphNode *, 8> SCCNodes; | SmallPtrSet<CallGraphNode *, 8> SCCNodes; | ||||
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); | ||||
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); | |||||
bool MadeChange = false; | bool MadeChange = false; | ||||
// Fill SCCNodes with the elements of the SCC. Used for quickly | // Fill SCCNodes with the elements of the SCC. Used for quickly | ||||
// looking up whether a given CallGraphNode is in this SCC. | // looking up whether a given CallGraphNode is in this SCC. | ||||
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) | for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) | ||||
SCCNodes.insert(*I); | SCCNodes.insert(*I); | ||||
// First pass, scan all of the functions in the SCC, simplifying them | // First pass, scan all of the functions in the SCC, simplifying them | ||||
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { | ||||
F->getContext(), AttributeSet::FunctionIndex, NewAttributes); | F->getContext(), AttributeSet::FunctionIndex, NewAttributes); | ||||
if (PAL != NPAL) { | if (PAL != NPAL) { | ||||
MadeChange = true; | MadeChange = true; | ||||
F->addAttributes(AttributeSet::FunctionIndex, NPAL); | F->addAttributes(AttributeSet::FunctionIndex, NPAL); | ||||
} | } | ||||
} | } | ||||
// If the SCC unwinds, find CallSites in exception handling regions, | |||||
unwind -> unwinds hfinkel: unwind -> unwinds | |||||
// and mark them with the NoInline attribute. Note that this need to be | |||||
mark the -> mark them with the hfinkel: mark the -> mark them with the | |||||
// performed only once per module. | |||||
if (!MarkNoInlineInEHR && SCCMightUnwind) | |||||
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) | |||||
You should MarkNoInlineInEHR = true; here instead of in AddNoinlineForCSInEH (no need to do the CGSCC traversal here to call functions that don't do anything). Also, to maintain the general pass model, I think you need to override the pass's 'virtual bool doFinalization(Module &)' function and reset the MarkNoInlineInEHR flag to false. hfinkel: You should MarkNoInlineInEHR = true; here instead of in AddNoinlineForCSInEH (no need to do the… | |||||
if (Function *F = (*I)->getFunction()) { | |||||
AddNoinlineForCSInEH(F); | |||||
MarkNoInlineInEHR = true; | |||||
break; | |||||
Why do you 'break;' here? If you do that, you'll only visit the first function in each call graph. I think you want to have: if (!MarkColdInEHR && SCCMightUnwind) { MarkColdInEHR = true; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) continue; AddColdForCSInEH(F); } } hfinkel: Why do you 'break;' here? If you do that, you'll only visit the first function in each call… | |||||
As I comment above, AddColdForCSInEH() does not need to be performed multiple times for each function. Since it handle all CallSites for __cxa_throw() in a module, it should be called only once per module. That's also why I have MarkColdInEHR . junbuml: As I comment above, AddColdForCSInEH() does not need to be performed multiple times for each… | |||||
OIC, you don't really want to iterate over the call graph here at all; you're just trying to get the first function, any function, so that you can call getParent() to get the module. Don't do that, just call CG.getModule(). hfinkel: OIC, you don't really want to iterate over the call graph here at all; you're just trying to… | |||||
} | |||||
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { | for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { | ||||
// Convert any invoke instructions to non-throwing functions in this node | // Convert any invoke instructions to non-throwing functions in this node | ||||
// into call instructions with a branch. This makes the exception blocks | // into call instructions with a branch. This makes the exception blocks | ||||
// dead. | // dead. | ||||
if (Function *F = (*I)->getFunction()) | if (Function *F = (*I)->getFunction()) | ||||
MadeChange |= SimplifyFunction(F); | MadeChange |= SimplifyFunction(F); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines | void PruneEH::DeleteBasicBlock(BasicBlock *BB) { | ||||
// Get the list of successors of this block. | // Get the list of successors of this block. | ||||
std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); | std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB)); | ||||
for (unsigned i = 0, e = Succs.size(); i != e; ++i) | for (unsigned i = 0, e = Succs.size(); i != e; ++i) | ||||
Succs[i]->removePredecessor(BB); | Succs[i]->removePredecessor(BB); | ||||
BB->eraseFromParent(); | BB->eraseFromParent(); | ||||
} | } | ||||
/// AddNoInlineForEHAllocUsers - Add the NoInlie attribute if CallSites take the | |||||
majnemerUnsubmitted NoInlie -> NoInline majnemer: `NoInlie` -> `NoInline` | |||||
/// momeory allocated for the exception to be thrown as its first arguement. | |||||
void PruneEH::AddNoInlineForEHAllocUsers(Value *EHAlloc, | |||||
CallInst *EHAllocCall) { | |||||
for (User *AllocU : EHAlloc->users()) { | |||||
if (auto *BCInst = dyn_cast<BitCastInst>(AllocU)) { | |||||
You can use auto *BCInst to avoid repetition. majnemer: You can use `auto *BCInst` to avoid repetition. | |||||
AddNoInlineForEHAllocUsers(BCInst, EHAllocCall); | |||||
} else { | |||||
Please use consistent bracing. majnemer: Please use consistent bracing. | |||||
CallSite CS(AllocU); | |||||
if (CS && !isa<IntrinsicInst>(AllocU) && !CS.arg_empty() && | |||||
CS.getArgument(0)->stripPointerCasts() == EHAllocCall) { | |||||
Function *EHFn = CS.getCalledFunction(); | |||||
if (EHFn && !EHFn->hasFnAttribute(Attribute::NoInline) && | |||||
!EHFn->hasFnAttribute(Attribute::AlwaysInline) && | |||||
!EHFn->hasFnAttribute(Attribute::InlineHint) && | |||||
!CS.hasFnAttr(Attribute::NoInline) && | |||||
!CS.hasFnAttr(Attribute::AlwaysInline)) | |||||
CS.setIsNoInline(); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
/// AddNoinlineForCSInEH - add the NoInline attribute for CallSites | |||||
/// invoked in exception handling context. | |||||
/// FIXME: It might be reasonably to avoid inlining CallSites invoked in | |||||
/// exception handling context so that we can reduce code size blow-up in | |||||
/// EH region as well as indirectly increase inline opportunites for unwinding | |||||
/// functions containing exception handling code. | |||||
void PruneEH::AddNoinlineForCSInEH(Function *F) { | |||||
Function *FnThrowE = | |||||
F->getParent()->getFunction(TLI->getName(LibFunc::cxa_throw)); | |||||
Function *FnEAlloc = F->getParent()->getFunction( | |||||
There's no need to require the bitcast here. What if you actually want an i8* as the exception type? You should just call stripPointerCasts instead. hfinkel: There's no need to require the bitcast here. What if you actually want an i8* as the exception… | |||||
Thanks Hal for the review. junbuml: Thanks Hal for the review.
I refactor this part into a separate function to trace bitcast from… | |||||
TLI->getName(LibFunc::cxa_allocate_exception)); | |||||
if (!FnEAlloc || !FnThrowE || !FnEAlloc->isDeclaration() || | |||||
!FnThrowE->isDeclaration()) | |||||
return; | |||||
// Try to find functions which takes memory allocated specifically for | |||||
// exception to be thrown as its first argument, which must be a constructor | |||||
// or method executed in the context of exception handling. | |||||
// For example, in IR below, the NoInline attribute will be added in CallInst | |||||
// for MyException() as it takes the memory allocated for the exception | |||||
// thrown. | |||||
// | |||||
// %exception = call i8* @__cxa_allocate_exception(i64 1) | |||||
// %0 = bitcast i8* %exception to %class.MyException* | |||||
// call void @MyException(%class.MyException* %0) | |||||
// call void @__cxa_throw(i8* %exception, .. ) | |||||
for (User *FnU : FnThrowE->users()) { | |||||
CallInst *ThrowCall = dyn_cast<CallInst>(FnU); | |||||
majnemerUnsubmitted Likewise, you can use auto * here. majnemer: Likewise, you can use `auto *` here. | |||||
if (!ThrowCall || ThrowCall->isNoBuiltin()) | |||||
continue; | |||||
// Make sure that the first argument of __cxa_throw() is the memory | |||||
// allocated by __cxa_allocate_exception(). | |||||
CallInst *EHAllocCall = dyn_cast<CallInst>(ThrowCall->getArgOperand(0)); | |||||
majnemerUnsubmitted And here. majnemer: And here. | |||||
if (!EHAllocCall || EHAllocCall->isNoBuiltin() || | |||||
EHAllocCall->getCalledFunction() != FnEAlloc) | |||||
Not Done ReplyInline ActionsWhy are you looking for users of cxa_throw? Can we make the assumption that all user of cxa_allocate_exception are essentially cold, and just look for the users of return values of that function? hfinkel: Why are you looking for users of __cxa_throw? Can we make the assumption that all user of… | |||||
Not Done ReplyInline ActionsI'm trying to find all actual calls for __cxa_throw() by looking at the users of FnThrowE where an exception is thrown. Here, I specifically check the exception allocated to be thrown (__cxa_throw()) in throw statement, which should be cold unless the program logic highly rely on throw statement. When a exception handling class is created in throw statement, Please correct me if I miss something. junbuml: I'm trying to find all actual calls for __cxa_throw() by looking at the users of FnThrowE where… | |||||
Not Done ReplyInline ActionsI understand what you're doing, but my question is this: Is the call to cxa_allocate_exception itself cold? If so, perhaps that's all we need to find. Maybe looking for the cxa_throw calls is an unnecessary complication. hfinkel: I understand what you're doing, but my question is this: Is the call to… | |||||
Not Done ReplyInline ActionsI believe cxa_allocate_exception() itself must be cold in almost all normal c++ program. But, I just wanted to make sure the exception allocated is actually used for throwing. If checking cxa_throw() doesn't make it too complicated, I believe there is no harm with it. I can also leave FIXME about it. Please let me know your thought! junbuml: I believe __cxa_allocate_exception() itself must be cold in almost all normal c++ program. But… | |||||
Not Done ReplyInline Actions
Okay. Let me also ask something I should have asked in the very beginning: Why are you doing this here and not in Clang? It seems that, in Clang, it would be simple to have a piece of state that records whether we're in a throw statement or a catch block, and marks the call sites appropriately. hfinkel: > I believe cxa_allocate_exception() itself must be cold in almost all normal c++ program. But… | |||||
Not Done ReplyInline ActionsI just wanted to place it somewhere exception handling specific pass, instead of directly touching inliner. If you believe Clang (ItaniumCXXABI.cpp ?) is better place I can do the same thing in there. junbuml: I just wanted to place it somewhere exception handling specific pass, instead of directly… | |||||
Not Done ReplyInline ActionsThis might be really off base, but here's what I was thinking about last night... We're trying to mark all callsites within throw statements (and catch blocks) as cold (noinline). This seems analogous to me to how we handle setting instruction fast-math flags, where we give the IRBuilder a default state, and change it based on scope information.
This seems like it might be both more general and simpler. hfinkel: This might be really off base, but here's what I was thinking about last night...
We're trying… | |||||
continue; | |||||
AddNoInlineForEHAllocUsers(EHAllocCall, EHAllocCall); | |||||
} | |||||
// FIXME: Similarly, we could add the NoInline attribute for CallSites in | |||||
// catch blocks by traversing blocks reachable from landingpads until EH | |||||
// return points. | |||||
} |
Please do not have a space between the variable and the parenthesis.