Index: include/llvm/Analysis/TargetLibraryInfo.def =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.def +++ include/llvm/Analysis/TargetLibraryInfo.def @@ -87,6 +87,9 @@ /// float __cospif(float x); TLI_DEFINE_ENUM_INTERNAL(cospif) TLI_DEFINE_STRING_INTERNAL("__cospif") +/// void* __cxa_allocate_exception(size_t thrown_size) throw(); +TLI_DEFINE_ENUM_INTERNAL(cxa_allocate_exception) +TLI_DEFINE_STRING_INTERNAL("__cxa_allocate_exception") /// int __cxa_atexit(void (*f)(void *), void *p, void *d); TLI_DEFINE_ENUM_INTERNAL(cxa_atexit) TLI_DEFINE_STRING_INTERNAL("__cxa_atexit") @@ -100,6 +103,9 @@ /// void __cxa_guard_release(guard_t *guard); TLI_DEFINE_ENUM_INTERNAL(cxa_guard_release) TLI_DEFINE_STRING_INTERNAL("__cxa_guard_release") +/// void __cxa_throw(guard_t *guard); +TLI_DEFINE_ENUM_INTERNAL(cxa_throw) +TLI_DEFINE_STRING_INTERNAL("__cxa_throw") /// int __isoc99_scanf (const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_scanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -270,8 +270,8 @@ bool isNoInline() const { CALLSITE_DELEGATE_GETTER(isNoInline()); } - void setIsNoInline(bool Value = true) { - CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); + void setIsNoInline() { + CALLSITE_DELEGATE_SETTER(setIsNoInline()); } /// @brief Determine if the call does not access memory. Index: lib/Transforms/IPO/PruneEH.cpp =================================================================== --- lib/Transforms/IPO/PruneEH.cpp +++ lib/Transforms/IPO/PruneEH.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -40,15 +41,21 @@ namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(ID) { + TargetLibraryInfo *TLI; + bool MarkNoInlineInEHR; + + PruneEH() : CallGraphSCCPass(ID), MarkNoInlineInEHR (false) { initializePruneEHPass(*PassRegistry::getPassRegistry()); } // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); + void AddNoinlineForCSInEH(Function *F); + void AddNoInlineForEHAllocUsers(Value *EHAlloc, CallInst *EHAllocCall); }; } @@ -56,15 +63,23 @@ INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) Pass *llvm::createPruneEHPass() { return new PruneEH(); } +void PruneEH::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet SCCNodes; CallGraph &CG = getAnalysis().getCallGraph(); + TLI = &getAnalysis().getTLI(); + bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly @@ -171,6 +186,14 @@ } } + // If the SCC unwind, find CallSites in exception handling regions, + // and mark the NoInline attribute. Note that this need to be performed only + // once per module. + if (!MarkNoInlineInEHR && SCCMightUnwind) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) + AddNoinlineForCSInEH(F); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks @@ -272,3 +295,70 @@ BB->eraseFromParent(); } + +/// AddNoInlineForEHAllocUsers - Add the NoInlie attribute if CallSites take the +/// momeory allocated for the exception to be thrown as its first arguement. +void PruneEH::AddNoInlineForEHAllocUsers(Value *EHAlloc, + CallInst *EHAllocCall) { + for (User *AllocU : EHAlloc->users()) { + if (BitCastInst *BCInst = dyn_cast(AllocU)) + AddNoInlineForEHAllocUsers(BCInst, EHAllocCall); + else { + CallSite CS(AllocU); + if (CS && !isa(AllocU) && !CS.arg_empty() && + CS.getArgument(0)->stripPointerCasts() == EHAllocCall) { + Function *EHFn = CS.getCalledFunction(); + if (EHFn && !EHFn->hasFnAttribute(Attribute::NoInline) && + !EHFn->hasFnAttribute(Attribute::AlwaysInline) && + !EHFn->hasFnAttribute(Attribute::InlineHint) && + !CS.hasFnAttr(Attribute::NoInline) && + !CS.hasFnAttr(Attribute::AlwaysInline)) + CS.setIsNoInline(); + } + } + } +} + +/// AddNoinlineForCSInEH - add the NoInline attribute for CallSites +/// invoked in exception handling context. +/// FIXME: It might be reasonably to avoid inlining CallSites invoked in +/// exception handling context so that we can reduce code size blow-up in +/// EH region as well as indirectly increase inline opportunites for unwinding +/// functions containing exception handling code. +void PruneEH::AddNoinlineForCSInEH(Function *F) { + MarkNoInlineInEHR = true; + Function *FnThrowE = + F->getParent()->getFunction(TLI->getName(LibFunc::cxa_throw)); + Function *FnEAlloc = F->getParent()->getFunction( + TLI->getName(LibFunc::cxa_allocate_exception)); + + if (!FnEAlloc || !FnThrowE || !FnEAlloc->isDeclaration() || + !FnThrowE->isDeclaration()) + return; + // Try to find functions which takes memory allocated specifically for + // exception to be thrown as its first argument, which must be a constructor + // or method executed in the context of exception handling. + // For example, in IR below, the NoInline attribute will be added in CallInst + // for MyException() as it takes the memory allocated for the exception + // thrown. + // + // %exception = call i8* @__cxa_allocate_exception(i64 1) + // %0 = bitcast i8* %exception to %class.MyException* + // call void @MyException(%class.MyException* %0) + // call void @__cxa_throw(i8* %exception, .. ) + for (User *FnU : FnThrowE->users()) { + CallInst *ThrowCall = dyn_cast(FnU); + if (!ThrowCall || ThrowCall->isNoBuiltin()) + continue; + // Make sure that the first argument of __cxa_throw() is the memory + // allocated by __cxa_allocate_exception(). + CallInst *EHAllocCall = dyn_cast(ThrowCall->getArgOperand(0)); + if (!EHAllocCall || EHAllocCall->isNoBuiltin() || + EHAllocCall->getCalledFunction() != FnEAlloc) + continue; + AddNoInlineForEHAllocUsers(EHAllocCall, EHAllocCall); + } + // FIXME: Similarly, we could add the NoInline attribute for CallSites in + // catch blocks by traversing blocks reachable from landingpads until EH + // return points. +} Index: test/Transforms/PruneEH/noinlineattr.ll =================================================================== --- /dev/null +++ test/Transforms/PruneEH/noinlineattr.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -prune-eh -S | FileCheck %s + +%class.MyException = type { i8 } +$_ZN11MyExceptionC2Ev = comdat any +@_ZTIi = external global i8* + +; CHECK-LABEL: _Z7fCalleeii +; CHECK: call void @_ZN11MyExceptionC2Ev(%class.MyException* %0) [[ATTR_NUM:#[0-9]+]] +; CHECK: attributes [[ATTR_NUM]] = { noinline } +define i32 @_Z7fCalleeii(i32 %idx, i32 %limit) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + %0 = bitcast i8* %exception to %class.MyException* + call void @_ZN11MyExceptionC2Ev(%class.MyException* %0) + br label %invoke.cont + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +if.end: ; preds = %entry + ret i32 %idx +} + +; Function Attrs: nounwind +define linkonce_odr void @_ZN11MyExceptionC2Ev(%class.MyException* %this) unnamed_addr comdat align 2 { +entry: + ret void +} + +declare i8* @__cxa_allocate_exception(i64) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_throw(i8*, i8*, i8*) + +attributes #0 = { nounwind } +attributes #1 = { noreturn }