Index: include/llvm/Analysis/TargetLibraryInfo.def =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.def +++ include/llvm/Analysis/TargetLibraryInfo.def @@ -87,6 +87,9 @@ /// float __cospif(float x); TLI_DEFINE_ENUM_INTERNAL(cospif) TLI_DEFINE_STRING_INTERNAL("__cospif") +/// void* __cxa_allocate_exception(size_t thrown_size) throw(); +TLI_DEFINE_ENUM_INTERNAL(cxa_allocate_exception) +TLI_DEFINE_STRING_INTERNAL("__cxa_allocate_exception") /// int __cxa_atexit(void (*f)(void *), void *p, void *d); TLI_DEFINE_ENUM_INTERNAL(cxa_atexit) TLI_DEFINE_STRING_INTERNAL("__cxa_atexit") @@ -100,6 +103,9 @@ /// void __cxa_guard_release(guard_t *guard); TLI_DEFINE_ENUM_INTERNAL(cxa_guard_release) TLI_DEFINE_STRING_INTERNAL("__cxa_guard_release") +/// void __cxa_throw(guard_t *guard); +TLI_DEFINE_ENUM_INTERNAL(cxa_throw) +TLI_DEFINE_STRING_INTERNAL("__cxa_throw") /// int __isoc99_scanf (const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_scanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -271,8 +271,16 @@ bool isNoInline() const { CALLSITE_DELEGATE_GETTER(isNoInline()); } - void setIsNoInline(bool Value = true) { - CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); + void setIsNoInline() { + CALLSITE_DELEGATE_SETTER(setIsNoInline()); + } + + /// @brief Return true if the call is cold. + bool isCold() const { + CALLSITE_DELEGATE_GETTER(isCold()); + } + void setIsCold() { + CALLSITE_DELEGATE_SETTER(setIsCold()); } /// @brief Determine if the call does not access memory. Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1588,6 +1588,12 @@ addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline); } + /// \brief Return true if the call is Cold. + bool isCold() const { return hasFnAttr(Attribute::Cold); } + void setIsCold() { + addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + /// \brief Return true if the call can return twice bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); @@ -3396,6 +3402,12 @@ addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline); } + /// \brief Return true if the call is Cold. + bool isCold() const { return hasFnAttr(Attribute::Cold); } + void setIsCold() { + addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); + } + /// \brief Determine if the call does not access memory. bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); Index: lib/Transforms/IPO/PruneEH.cpp =================================================================== --- lib/Transforms/IPO/PruneEH.cpp +++ lib/Transforms/IPO/PruneEH.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -40,15 +41,22 @@ namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(ID) { + TargetLibraryInfo *TLI; + bool MarkColdInEHR; + + PruneEH() : CallGraphSCCPass(ID), MarkColdInEHR(false) { initializePruneEHPass(*PassRegistry::getPassRegistry()); } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC) override; + bool doFinalization(CallGraph &CG) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); + void AddColdForCSInEH(Function *F); + void AddColdForEHAllocUsers(Value *EHAlloc, CallInst *EHAllocCall); }; } @@ -56,15 +64,28 @@ INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) Pass *llvm::createPruneEHPass() { return new PruneEH(); } +void PruneEH::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool PruneEH::doFinalization(CallGraph &CG) { + MarkColdInEHR = false; + return false; +} + bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet SCCNodes; CallGraph &CG = getAnalysis().getCallGraph(); + TLI = &getAnalysis().getTLI(); + bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly @@ -171,6 +192,17 @@ } } + // If the SCC unwinds, find CallSites in exception handling regions, + // and mark them with the NoInline and Cold attributes. Note that this need + // to be performed only once per module. + if (!MarkColdInEHR && SCCMightUnwind) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) { + AddColdForCSInEH(F); + MarkColdInEHR = true; + break; + } + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks @@ -272,3 +304,77 @@ BB->eraseFromParent(); } + +/// AddColdForEHAllocUsers - add the NoInline and Cold attributes if +/// CallSites take the memory allocated for the exception to be thrown +/// as its first argument. +/// FIXME: We add both NoInline and Cold because the inline cold-threshold +/// is not tuned yet (r200898). As of now, considering that CallSites in +/// exception handling regions are very cold is not unreasonable even without +/// profiling, and avoiding inlining in exception handling region may not have +/// significant impacts on performance unless a program execution logic +/// really depends on exception handling flows. However, when the inline +/// cold-threshold is tuned, we may need to remove NoInline here so that we +/// can allow a trivial constructor to be inlined. +void PruneEH::AddColdForEHAllocUsers(Value *EHAlloc, CallInst *EHAllocCall) { + for (User *AllocU : EHAlloc->users()) { + if (auto *BCInst = dyn_cast(AllocU)) { + AddColdForEHAllocUsers(BCInst, EHAllocCall); + } else { + CallSite CS(AllocU); + if (CS && !isa(AllocU) && !CS.arg_empty() && + CS.getArgument(0)->stripPointerCasts() == EHAllocCall) { + if (!CS.hasFnAttr(Attribute::NoInline) && + !CS.hasFnAttr(Attribute::AlwaysInline) && + !CS.hasFnAttr(Attribute::InlineHint) && + !CS.hasFnAttr(Attribute::Cold)) { + CS.setIsNoInline(); + CS.setIsCold(); + } + } + } + } +} + +/// AddColdForCSInEH - it might be reasonably to avoid inlining CallSites +/// invoked in exception handling context so that we can reduce code size +/// blow-up in EH region as well as indirectly increase inline opportunites +/// for unwinding functions containing exception handling code. This function +/// adds the NoInline and Cold attributes for CallSites invoked in exception +/// handling context. +void PruneEH::AddColdForCSInEH(Function *F) { + Function *FnThrowE = + F->getParent()->getFunction(TLI->getName(LibFunc::cxa_throw)); + Function *FnEAlloc = F->getParent()->getFunction( + TLI->getName(LibFunc::cxa_allocate_exception)); + + if (!FnEAlloc || !FnThrowE || !FnEAlloc->isDeclaration() || + !FnThrowE->isDeclaration()) + return; + // Try to find functions which takes memory allocated specifically for + // exception to be thrown as its first argument, which must be a constructor + // or method executed in the context of exception handling. + // For example, in IR below, the NoInline and Cold attributes will be added in + // CallInst for MyException() as it takes the memory allocated for the + // exception thrown. + // + // %exception = call i8* @__cxa_allocate_exception(i64 1) + // %0 = bitcast i8* %exception to %class.MyException* + // call void @MyException(%class.MyException* %0) + // call void @__cxa_throw(i8* %exception, .. ) + for (User *FnU : FnThrowE->users()) { + auto *ThrowCall = dyn_cast(FnU); + if (!ThrowCall || ThrowCall->isNoBuiltin()) + continue; + // Make sure that the first argument of __cxa_throw() is the memory + // allocated by __cxa_allocate_exception(). + auto *EHAllocCall = dyn_cast(ThrowCall->getArgOperand(0)); + if (!EHAllocCall || EHAllocCall->isNoBuiltin() || + EHAllocCall->getCalledFunction() != FnEAlloc) + continue; + AddColdForEHAllocUsers(EHAllocCall, EHAllocCall); + } + // FIXME: Similarly, we could add the same attributes for CallSites in + // catch blocks by traversing blocks reachable from landingpads until EH + // return points. +} Index: test/Transforms/PruneEH/noinlineattr.ll =================================================================== --- /dev/null +++ test/Transforms/PruneEH/noinlineattr.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -prune-eh -S | FileCheck %s + +%class.MyException = type { i8 } +$MyException = comdat any +@_ZTIi = external global i8* + +; CHECK-LABEL: f_throw +; CHECK: call void @MyException(%class.MyException* %0) [[ATTR_NUM:#[0-9]+]] +; CHECK: attributes [[ATTR_NUM]] = { cold noinline } +define i32 @f_throw(i32 %idx, i32 %limit) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + %0 = bitcast i8* %exception to %class.MyException* + call void @MyException(%class.MyException* %0) + br label %invoke.cont + +invoke.cont: + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +if.end: + ret i32 %idx +} + +define linkonce_odr void @MyException(%class.MyException* %this) { +entry: + ret void +} + +declare i8* @__cxa_allocate_exception(i64) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_throw(i8*, i8*, i8*) + +attributes #0 = { nounwind } +attributes #1 = { noreturn }