Index: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp +++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp @@ -202,10 +202,8 @@ std::function *GetAC, std::function *GTTI, Optional> GBFI, - ProfileSummaryInfo *ProfSI, - std::function *GORE) - : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI), - GetORE(GORE) {} + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); // Main part of the transformation that calls helper functions to find @@ -271,7 +269,6 @@ std::function *GetTTI; Optional> GetBFI; ProfileSummaryInfo *PSI; - std::function *GetORE; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -282,7 +279,8 @@ // Return true if the callee of CS should be partially inlined with // profit. bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost); + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true @@ -337,7 +335,7 @@ std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr - computeOutliningColdRegionsInfo(Function *F); + computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE); }; struct PartialInlinerLegacyPass : public ModulePass { @@ -362,7 +360,6 @@ &getAnalysis(); ProfileSummaryInfo *PSI = getAnalysis().getPSI(); - std::unique_ptr UPORE; std::function GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { @@ -374,14 +371,7 @@ return TTIWP->getTTI(F); }; - std::function GetORE = - [&UPORE](Function &F) -> OptimizationRemarkEmitter & { - UPORE.reset(new OptimizationRemarkEmitter(&F)); - return *UPORE.get(); - }; - - return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI, - &GetORE) + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI) .run(M); } }; @@ -389,7 +379,8 @@ } // end anonymous namespace std::unique_ptr -PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { +PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F, + OptimizationRemarkEmitter &ORE) { BasicBlock *EntryBlock = &F->front(); DominatorTree DT(*F); @@ -403,8 +394,6 @@ } else BFI = &(*GetBFI)(*F); - auto &ORE = (*GetORE)(*F); - // Return if we don't have profiling information. if (!PSI->hasInstrumentationProfile()) return std::unique_ptr(); @@ -766,7 +755,8 @@ bool PartialInlinerImpl::shouldPartialInline( CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost) { + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { using namespace ore; Instruction *Call = CS.getInstruction(); @@ -778,7 +768,6 @@ Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); - auto &ORE = (*GetORE)(*Caller); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, *GetAssumptionCache, GetBFI, PSI, &ORE); @@ -1270,14 +1259,14 @@ if (F->user_begin() == F->user_end()) return {false, nullptr}; - auto &ORE = (*GetORE)(*F); + OptimizationRemarkEmitter ORE(F); // Only try to outline cold regions if we have a profile summary, which // implies we have profiling information. if (PSI->hasProfileSummary() && F->hasProfileData() && !DisableMultiRegionPartialInline) { std::unique_ptr OMRI = - computeOutliningColdRegionsInfo(F); + computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { FunctionCloner Cloner(F, OMRI.get(), ORE); @@ -1357,11 +1346,11 @@ // inlining the function with outlining (The inliner uses the size increase to // model the cost of inlining a callee). if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { - auto &ORE = (*GetORE)(*Cloner.OrigFunc); + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); - ORE.emit([&]() { + OrigFuncORE.emit([&]() { return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) << ore::NV("Function", Cloner.OrigFunc) @@ -1394,11 +1383,10 @@ if (IsLimitReached()) continue; - - if (!shouldPartialInline(CS, Cloner, WeightedRcost)) + OptimizationRemarkEmitter CallerORE(CS.getCaller()); + if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE)) continue; - auto &ORE = (*GetORE)(*CS.getCaller()); // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); @@ -1413,7 +1401,7 @@ : nullptr))) continue; - ORE.emit(OR); + CallerORE.emit(OR); // Now update the entry count: if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { @@ -1436,8 +1424,8 @@ if (CalleeEntryCount) Cloner.OrigFunc->setEntryCount( CalleeEntryCount.setCount(CalleeEntryCountV)); - auto &ORE = (*GetORE)(*Cloner.OrigFunc); - ORE.emit([&]() { + OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); + OrigFuncORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) << "Partially inlined into at least one caller"; }); @@ -1519,14 +1507,9 @@ return FAM.getResult(F); }; - std::function GetORE = - [&FAM](Function &F) -> OptimizationRemarkEmitter & { - return FAM.getResult(F); - }; - ProfileSummaryInfo *PSI = &AM.getResult(M); - if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE) + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI) .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); Index: llvm/trunk/test/Transforms/CodeExtractor/PartialInlineORECrash.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/PartialInlineORECrash.ll +++ llvm/trunk/test/Transforms/CodeExtractor/PartialInlineORECrash.ll @@ -0,0 +1,170 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -inline-threshold=0 -disable-output + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%0 = type { i32 (...)**, %1, %1, %3, %3, %3, i8, float, %4*, %5*, %5*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] } +%1 = type { %2, %3 } +%2 = type { [3 x %3] } +%3 = type { [4 x float] } +%4 = type <{ i8*, i16, i16, [4 x i8], i8*, i32, %3, %3, [4 x i8] }> +%5 = type { i32 (...)**, i32, i8* } +%6 = type <{ %7, [4 x i8], %19*, %20*, %30, %35, %3, float, i8, i8, i8, i8, %37, i32, [4 x i8] }> +%7 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17 }> +%8 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8 }> +%9 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %0**, i8, [7 x i8] }> +%11 = type { i32 (...)** } +%12 = type { float, i32, i32, float, i8, %15*, i8, i8, i8, float, i8, float, %13* } +%13 = type opaque +%14 = type { i32 (...)** } +%15 = type { i32 (...)** } +%16 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17, [4 x i8] }> +%17 = type { %18 } +%18 = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 } +%19 = type { i32 (...)** } +%20 = type <{ i32 (...)**, %21, %25, %9, i8, [7 x i8] }> +%21 = type { %22 } +%22 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %24*, i8, [7 x i8] }> +%24 = type { i32, i32 } +%25 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %27**, i8, [7 x i8] }> +%27 = type { i32, [4 x i8], [4 x %29], i8*, i8*, i32, float, float, i32 } +%29 = type <{ %3, %3, %3, %3, %3, float, float, float, i32, i32, i32, i32, [4 x i8], i8*, float, i8, [3 x i8], float, float, i32, %3, %3, [4 x i8] }> +%30 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %32**, i8, [7 x i8] }> +%32 = type { i32 (...)**, i32, i32, i32, i8, %33*, %33*, float, float, %3, %3, %3 } +%33 = type <{ %0, %2, %3, %3, float, %3, %3, %3, %3, %3, %3, %3, float, float, i8, [3 x i8], float, float, float, float, float, float, %34*, %30, i32, i32, i32, [4 x i8] }> +%34 = type { i32 (...)** } +%35 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %33**, i8, [7 x i8] }> +%37 = type <{ i8, [3 x i8], i32, i32, [4 x i8], %39**, i8, [7 x i8] }> +%39 = type { i32 (...)** } +%40 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8, [7 x i8] }> + +@gDisableDeactivation = external local_unnamed_addr global i8, align 1 +@0 = external dso_local unnamed_addr constant [29 x i8], align 1 +@1 = external dso_local unnamed_addr constant [14 x i8], align 1 +@2 = external dso_local unnamed_addr constant [22 x i8], align 1 +@gDeactivationTime = external local_unnamed_addr global float, align 4 + +declare void @_ZN15CProfileManager12Stop_ProfileEv() local_unnamed_addr + +declare void @_ZN15CProfileManager13Start_ProfileEPKc(i8*) local_unnamed_addr + +declare void @_ZN17btCollisionObject18setActivationStateEi(%0*, i32 signext) local_unnamed_addr + +declare hidden void @__clang_call_terminate(i8*) local_unnamed_addr + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0 + +define void @_ZN23btDiscreteDynamicsWorld28internalSingleStepSimulationEf(%6*, float) unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !27 { + invoke void null(%6* nonnull %0, float %1) + to label %5 unwind label %3 + +;