Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -224,10 +224,8 @@ struct FunctionCloner { // Two constructors, one for single region outlining, the other for // multi-region outlining. - FunctionCloner(Function *F, FunctionOutliningInfo *OI, - OptimizationRemarkEmitter &ORE); - FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, - OptimizationRemarkEmitter &ORE); + FunctionCloner(Function *F, FunctionOutliningInfo *OI); + FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI); ~FunctionCloner(); // Prepare for function outlining: making sure there is only @@ -236,14 +234,14 @@ void NormalizeReturnBlock(); // Do function outlining for cold regions. - bool doMultiRegionFunctionOutlining(); + bool doMultiRegionFunctionOutlining(OptimizationRemarkEmitter &ORE); // Do function outlining for region after early return block(s). // NOTE: For vararg functions that do the vararg handling in the outlined // function, we temporarily generate IR that does not properly // forward varargs to the outlined function. Calling InlineFunction // will update calls to the outlined functions to properly forward // the varargs. - Function *doSingleRegionFunctionOutlining(); + Function *doSingleRegionFunctionOutlining(OptimizationRemarkEmitter &ORE); Function *OrigFunc = nullptr; Function *ClonedFunc = nullptr; @@ -262,7 +260,6 @@ // ClonedOMRI is specific to outlining cold regions. std::unique_ptr ClonedOMRI = nullptr; std::unique_ptr ClonedFuncBFI = nullptr; - OptimizationRemarkEmitter &ORE; }; private: @@ -337,7 +334,7 @@ std::unique_ptr computeOutliningInfo(Function *F); std::unique_ptr - computeOutliningColdRegionsInfo(Function *F); + computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE); }; struct PartialInlinerLegacyPass : public ModulePass { @@ -389,7 +386,8 @@ } // end anonymous namespace std::unique_ptr -PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { +PartialInlinerImpl::computeOutliningColdRegionsInfo( + Function *F, OptimizationRemarkEmitter &ORE) { BasicBlock *EntryBlock = &F->front(); DominatorTree DT(*F); @@ -403,8 +401,6 @@ } else BFI = &(*GetBFI)(*F); - auto &ORE = (*GetORE)(*F); - // Return if we don't have profiling information. if (!PSI->hasInstrumentationProfile()) return std::unique_ptr(); @@ -970,8 +966,8 @@ } PartialInlinerImpl::FunctionCloner::FunctionCloner( - Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE) - : OrigFunc(F), ORE(ORE) { + Function *F, FunctionOutliningInfo *OI) + : OrigFunc(F) { ClonedOI = llvm::make_unique(); // Clone the function, so that we can hack away on it. @@ -993,9 +989,8 @@ } PartialInlinerImpl::FunctionCloner::FunctionCloner( - Function *F, FunctionOutliningMultiRegionInfo *OI, - OptimizationRemarkEmitter &ORE) - : OrigFunc(F), ORE(ORE) { + Function *F, FunctionOutliningMultiRegionInfo *OI) + : OrigFunc(F) { ClonedOMRI = llvm::make_unique(); // Clone the function, so that we can hack away on it. @@ -1104,7 +1099,8 @@ } } -bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { +bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining( + OptimizationRemarkEmitter &ORE) { auto ComputeRegionCost = [](SmallVectorImpl &Region) { int Cost = 0; @@ -1177,8 +1173,8 @@ return !OutlinedFunctions.empty(); } -Function * -PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { +Function *PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining( + OptimizationRemarkEmitter &ORE) { // Returns true if the block is to be partial inlined into the caller // (i.e. not to be extracted to the out of line function) auto ToBeInlined = [&, this](BasicBlock *BB) { @@ -1270,16 +1266,16 @@ if (F->user_begin() == F->user_end()) return {false, nullptr}; - auto &ORE = (*GetORE)(*F); // Only try to outline cold regions if we have a profile summary, which // implies we have profiling information. if (PSI->hasProfileSummary() && F->hasProfileData() && !DisableMultiRegionPartialInline) { + auto &ORE = (*GetORE)(*F); std::unique_ptr OMRI = - computeOutliningColdRegionsInfo(F); + computeOutliningColdRegionsInfo(F, ORE); if (OMRI) { - FunctionCloner Cloner(F, OMRI.get(), ORE); + FunctionCloner Cloner(F, OMRI.get()); #ifndef NDEBUG if (TracePartialInlining) { @@ -1288,7 +1284,7 @@ << "\n"; } #endif - bool DidOutline = Cloner.doMultiRegionFunctionOutlining(); + bool DidOutline = Cloner.doMultiRegionFunctionOutlining(ORE); if (DidOutline) { #ifndef NDEBUG @@ -1312,10 +1308,11 @@ if (!OI) return {false, nullptr}; - FunctionCloner Cloner(F, OI.get(), ORE); + auto &ORE = (*GetORE)(*F); + FunctionCloner Cloner(F, OI.get()); Cloner.NormalizeReturnBlock(); - Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(); + Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(ORE); if (!OutlinedFunction) return {false, nullptr}; Index: test/Transforms/CodeExtractor/PartialInlineORECrash.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/PartialInlineORECrash.ll @@ -0,0 +1,498 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -disable-output +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%0 = type { i32 (...)**, %1, %1, %3, %3, %3, i8, float, %4*, %5*, %5*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] } +%1 = type { %2, %3 } +%2 = type { [3 x %3] } +%3 = type { [4 x float] } +%4 = type <{ i8*, i16, i16, [4 x i8], i8*, i32, %3, %3, [4 x i8] }> +%5 = type { i32 (...)**, i32, i8* } +%6 = type <{ %7, [4 x i8], %19*, %20*, %30, %35, %3, float, i8, i8, i8, i8, %37, i32, [4 x i8] }> +%7 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17 }> +%8 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8 }> +%9 = type <{ %10, [3 x i8], i32, i32, [4 x i8], %0**, i8, [7 x i8] }> +%10 = type { i8 } +%11 = type { i32 (...)** } +%12 = type { float, i32, i32, float, i8, %15*, i8, i8, i8, float, i8, float, %13* } +%13 = type opaque +%14 = type { i32 (...)** } +%15 = type { i32 (...)** } +%16 = type <{ %8, [7 x i8], void (%16*, float)*, void (%16*, float)*, i8*, %17, [4 x i8] }> +%17 = type { %18 } +%18 = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 } +%19 = type { i32 (...)** } +%20 = type <{ i32 (...)**, %21, %25, %9, i8, [7 x i8] }> +%21 = type { %22 } +%22 = type <{ %23, [3 x i8], i32, i32, [4 x i8], %24*, i8, [7 x i8] }> +%23 = type { i8 } +%24 = type { i32, i32 } +%25 = type <{ %26, [3 x i8], i32, i32, [4 x i8], %27**, i8, [7 x i8] }> +%26 = type { i8 } +%27 = type { %28, [4 x i8], [4 x %29], i8*, i8*, i32, float, float, i32 } +%28 = type { i32 } +%29 = type <{ %3, %3, %3, %3, %3, float, float, float, i32, i32, i32, i32, [4 x i8], i8*, float, i8, [3 x i8], float, float, i32, %3, %3, [4 x i8] }> +%30 = type <{ %31, [3 x i8], i32, i32, [4 x i8], %32**, i8, [7 x i8] }> +%31 = type { i8 } +%32 = type { i32 (...)**, %28, i32, i32, i8, %33*, %33*, float, float, %3, %3, %3 } +%33 = type <{ %0, %2, %3, %3, float, %3, %3, %3, %3, %3, %3, %3, float, float, i8, [3 x i8], float, float, float, float, float, float, %34*, %30, i32, i32, i32, [4 x i8] }> +%34 = type { i32 (...)** } +%35 = type <{ %36, [3 x i8], i32, i32, [4 x i8], %33**, i8, [7 x i8] }> +%36 = type { i8 } +%37 = type <{ %38, [3 x i8], i32, i32, [4 x i8], %39**, i8, [7 x i8] }> +%38 = type { i8 } +%39 = type { i32 (...)** } +%40 = type <{ i32 (...)**, %9, %11*, %12, %13*, %14*, %15*, i8, [7 x i8] }> + +@gDisableDeactivation = external local_unnamed_addr global i8, align 1 +@0 = private unnamed_addr constant [29 x i8] c"internalSingleStepSimulation\00", align 1 +@1 = private unnamed_addr constant [14 x i8] c"updateActions\00", align 1 +@2 = private unnamed_addr constant [22 x i8] c"updateActivationState\00", align 1 +@gDeactivationTime = external local_unnamed_addr global float, align 4 + +declare void @_ZN15CProfileManager12Stop_ProfileEv() local_unnamed_addr + +declare void @_ZN15CProfileManager13Start_ProfileEPKc(i8*) local_unnamed_addr + +declare void @_ZN17btCollisionObject18setActivationStateEi(%0*, i32 signext) local_unnamed_addr + +declare hidden void @__clang_call_terminate(i8*) local_unnamed_addr + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0 + +define void @_ZN23btDiscreteDynamicsWorld28internalSingleStepSimulationEf(%6*, float) unnamed_addr align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !30 { + tail call void @_ZN15CProfileManager13Start_ProfileEPKc(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @0, i64 0, i64 0)) + %3 = bitcast %6* %0 to %16* + %4 = getelementptr inbounds %6, %6* %0, i64 0, i32 0, i32 3 + %5 = load void (%16*, float)*, void (%16*, float)** %4, align 8, !tbaa !31 + %6 = icmp eq void (%16*, float)* %5, null + br i1 %6, label %12, label %7, !prof !37 + +;