Index: llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h +++ llvm/trunk/include/llvm/Transforms/Utils/CodeExtractor.h @@ -72,11 +72,13 @@ /// sequence out into its new function. When a DominatorTree is also given, /// extra checking and transformations are enabled. If AllowVarArgs is true, /// vararg functions can be extracted. This is safe, if all vararg handling - /// code is extracted, including vastart. + /// code is extracted, including vastart. If AllowAlloca is true, then + /// extraction of blocks containing alloca instructions would be possible, + /// however code extractor won't validate whether extraction is legal. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - bool AllowVarArgs = false); + bool AllowVarArgs = false, bool AllowAlloca = false); /// Create a code extractor for a loop body. /// @@ -86,14 +88,6 @@ BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr); - /// Check to see if a block is valid for extraction. - /// - /// Blocks containing EHPads, allocas and invokes are not valid. If - /// AllowVarArgs is true, blocks with vastart can be extracted. This is - /// safe, if all vararg handling code is extracted, including vastart. - static bool isBlockValidForExtraction(const BasicBlock &BB, - bool AllowVarArgs); - /// Perform the extraction, returning the new function. /// /// Returns zero when called on a CodeExtractor instance where isEligible Index: llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp @@ -79,11 +79,9 @@ cl::desc("Aggregate arguments to code-extracted functions")); /// Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, - bool AllowVarArgs) { - // Landing pads must be in the function where they were inserted for cleanup. - if (BB.isEHPad()) - return false; +static bool isBlockValidForExtraction(const BasicBlock &BB, + const SetVector &Result, + bool AllowVarArgs, bool AllowAlloca) { // taking the address of a basic block moved to another function is illegal if (BB.hasAddressTaken()) return false; @@ -112,11 +110,63 @@ } } - // Don't hoist code containing allocas or invokes. If explicitly requested, - // allow vastart. + // If explicitly requested, allow vastart and alloca. For invoke instructions + // verify that extraction is valid. for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - if (isa(I) || isa(I)) - return false; + if (isa(I)) { + if (!AllowAlloca) + return false; + continue; + } + + if (const auto *II = dyn_cast(I)) { + // Unwind destination (either a landingpad, catchswitch, or cleanuppad) + // must be a part of the subgraph which is being extracted. + if (auto *UBB = II->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + + // All catch handlers of a catchswitch instruction as well as the unwind + // destination must be in the subgraph. + if (const auto *CSI = dyn_cast(I)) { + if (auto *UBB = CSI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + for (auto *HBB : CSI->handlers()) + if (!Result.count(const_cast(HBB))) + return false; + continue; + } + + // Make sure that entire catch handler is within subgraph. It is sufficient + // to check that catch return's block is in the list. + if (const auto *CPI = dyn_cast(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast(U)) + if (!Result.count(const_cast(CRI->getParent()))) + return false; + continue; + } + + // And do similar checks for cleanup handler - the entire handler must be + // in subgraph which is going to be extracted. For cleanup return should + // additionally check that the unwind destination is also in the subgraph. + if (const auto *CPI = dyn_cast(I)) { + for (const auto *U : CPI->users()) + if (const auto *CRI = dyn_cast(U)) + if (!Result.count(const_cast(CRI->getParent()))) + return false; + continue; + } + if (const auto *CRI = dyn_cast(I)) { + if (auto *UBB = CRI->getUnwindDest()) + if (!Result.count(UBB)) + return false; + continue; + } + if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::vastart) { @@ -133,7 +183,7 @@ /// Build a set of blocks to extract if the input blocks are viable. static SetVector buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT, - bool AllowVarArgs) { + bool AllowVarArgs, bool AllowAlloca) { assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; @@ -146,32 +196,41 @@ if (!Result.insert(BB)) llvm_unreachable("Repeated basic blocks in extraction input"); - if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) { - Result.clear(); - return Result; - } } -#ifndef NDEBUG - for (SetVector::iterator I = std::next(Result.begin()), - E = Result.end(); - I != E; ++I) - for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); - PI != PE; ++PI) - assert(Result.count(*PI) && - "No blocks in this region may have entries from outside the region" - " except for the first block!"); -#endif + for (auto *BB : Result) { + if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) + return {}; + + // Make sure that the first block is not a landing pad. + if (BB == Result.front()) { + if (BB->isEHPad()) { + DEBUG(dbgs() << "The first block cannot be an unwind block\n"); + return {}; + } + continue; + } + + // All blocks other than the first must not have predecessors outside of + // the subgraph which is being extracted. + for (auto *PBB : predecessors(BB)) + if (!Result.count(PBB)) { + DEBUG(dbgs() << "No blocks in this region may have entries from " + "outside the region except for the first block!\n"); + return {}; + } + } return Result; } CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI, bool AllowVarArgs) + BranchProbabilityInfo *BPI, bool AllowVarArgs, + bool AllowAlloca) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AllowVarArgs(AllowVarArgs), - Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {} + Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, @@ -179,7 +238,8 @@ : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, - /* AllowVarArgs */ false)) {} + /* AllowVarArgs */ false, + /* AllowAlloca */ false)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -1178,6 +1238,10 @@ moveCodeToFunction(newFunction); + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + // Update the branch weights for the exit block. if (BFI && NumExitBlocks > 1) calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); Index: llvm/trunk/test/Transforms/CodeExtractor/inline_eh.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/inline_eh.ll +++ llvm/trunk/test/Transforms/CodeExtractor/inline_eh.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s +; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s + +declare void @bar() +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() + +define internal void @callee(i1 %cond) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + invoke void @bar() + to label %invoke.cont unwind label %lpad + +invoke.cont: + br label %try.cont + +lpad: + %0 = landingpad { i8*, i32 } + catch i8* null + %1 = extractvalue { i8*, i32 } %0, 0 + %2 = extractvalue { i8*, i32 } %0, 1 + br label %catch + +catch: + %3 = call i8* @__cxa_begin_catch(i8* %1) + call void @__cxa_end_catch() + br label %try.cont + +try.cont: + br label %if.end + +if.end: + ret void +} + +define internal void @caller(i1 %cond) { +; CHECK-LABEL: define {{.*}} @caller +entry: +; CHECK: entry: +; CHECK-NEXT: br i1 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void @callee.1_{{.*}}() + call void @callee(i1 %cond) + ret void +} + +; CHECK-LABEL: define {{.*}} @callee.1_{{.*}}() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) +; CHECK: invoke void @bar() +; CHECK: landingpad Index: llvm/trunk/test/Transforms/CodeExtractor/inline_eh_1.ll =================================================================== --- llvm/trunk/test/Transforms/CodeExtractor/inline_eh_1.ll +++ llvm/trunk/test/Transforms/CodeExtractor/inline_eh_1.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -skip-partial-inlining-cost-analysis -partial-inliner -S | FileCheck %s +; RUN: opt < %s -skip-partial-inlining-cost-analysis -passes=partial-inliner -S | FileCheck %s + +declare dso_local void @bar() +declare dso_local i32 @__CxxFrameHandler3(...) + +define internal void @callee(i1 %cond) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + invoke void @bar() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: + br label %try.cont + +ehcleanup: + %0 = cleanuppad within none [] + cleanupret from %0 unwind label %catch.dispatch + +catch.dispatch: + %1 = catchswitch within none [label %catch] unwind to caller + +catch: + %2 = catchpad within %1 [i8* null, i32 64, i8* null] + catchret from %2 to label %catchret.dest + +catchret.dest: + br label %try.cont + +try.cont: + br label %if.end + +if.end: + ret void +} + +define internal void @caller(i1 %cond) { +; CHECK-LABEL: define {{.*}} @caller +entry: +; CHECK: entry: +; CHECK-NEXT: br i1 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void @callee.1_{{.*}}() + call void @callee(i1 %cond) + ret void +} + +; CHECK-LABEL: define {{.*}} @callee.1_{{.*}}() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) +; CHECK: invoke void @bar() +; CHECK: cleanuppad +; CHECK-NEXT: cleanupret +; CHECK: catchswitch +; CHECK: catchpad +; CHECK-NEXT: catchret