Index: include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- include/llvm/Transforms/Utils/CodeExtractor.h +++ include/llvm/Transforms/Utils/CodeExtractor.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include namespace llvm { @@ -146,7 +147,8 @@ BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock); private: - void severSplitPHINodes(BasicBlock *&Header); + void severSplitPHINodesOfEntry(BasicBlock *&Header); + void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); void splitReturnBlocks(); Function *constructFunction(const ValueSet &inputs, Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -531,10 +531,10 @@ } } -/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the -/// region, we need to split the entry block of the region so that the PHI node -/// is easier to deal with. -void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { +/// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside +/// of the region, we need to split the entry block of the region so that the +/// PHI node is easier to deal with. +void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) { unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; @@ -606,6 +606,55 @@ } } +/// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from +/// outlined region, we split these PHIs on two: one with inputs from region +/// and other with remaining incoming blocks; then first PHIs are placed in +/// outlined region. +void CodeExtractor::severSplitPHINodesOfExits( + const SmallPtrSetImpl &Exits) { + for (BasicBlock *ExitBB : Exits) { + BasicBlock *NewBB = nullptr; + + for (PHINode &PN : ExitBB->phis()) { + // find all incoming blocks from region + SmallVector IncomingVals; + for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) + if (Blocks.count(PN.getIncomingBlock(i))) + IncomingVals.push_back(i); + + // do not process PHI if there is one (or fewer) predecessor from region + if (IncomingVals.size() <= 1) + continue; + + // create block for new PHIs and add it to the list of outlined if it + // wasn't done before + if (!NewBB) { + NewBB = BasicBlock::Create(ExitBB->getContext(), + ExitBB->getName() + ".split", + ExitBB->getParent(), ExitBB); + for (pred_iterator I = pred_begin(ExitBB), E = pred_end(ExitBB); + I != E;) { + BasicBlock *PredBB = *I++; + if (Blocks.count(PredBB)) + PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB); + } + BranchInst::Create(ExitBB, NewBB); + Blocks.insert(NewBB); + } + + // split this PHI + PHINode *NewPN = + PHINode::Create(PN.getType(), IncomingVals.size(), + PN.getName() + ".ce", NewBB->getFirstNonPHI()); + for (unsigned i : IncomingVals) + NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i)); + for (unsigned i : reverse(IncomingVals)) + PN.removeIncomingValue(i, false); + PN.addIncoming(NewPN, NewBB); + } + } +} + void CodeExtractor::splitReturnBlocks() { for (BasicBlock *Block : Blocks) if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { @@ -1173,13 +1222,33 @@ } } - // If we have to split PHI nodes or the entry block, do so now. - severSplitPHINodes(header); - // If we have any return instructions in the region, split those blocks so // that the return is not in the region. splitReturnBlocks(); + // Calculate the exit blocks for the extracted region and the total exit + // weights for each of those blocks. + DenseMap ExitWeights; + SmallPtrSet ExitBlocks; + for (BasicBlock *Block : Blocks) { + for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; + ++SI) { + if (!Blocks.count(*SI)) { + // Update the branch weight for this successor. + if (BFI) { + BlockFrequency &BF = ExitWeights[*SI]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); + } + ExitBlocks.insert(*SI); + } + } + } + NumExitBlocks = ExitBlocks.size(); + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(header); + severSplitPHINodesOfExits(ExitBlocks); + // This takes place of the original loop BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), "codeRepl", oldFunction, @@ -1224,25 +1293,6 @@ cast(II)->moveBefore(TI); } - // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. - DenseMap ExitWeights; - SmallPtrSet ExitBlocks; - for (BasicBlock *Block : Blocks) { - for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; - ++SI) { - if (!Blocks.count(*SI)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[*SI]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); - } - ExitBlocks.insert(*SI); - } - } - } - NumExitBlocks = ExitBlocks.size(); - // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunction(inputs, outputs, header, newFuncRoot, @@ -1270,8 +1320,8 @@ if (BFI && NumExitBlocks > 1) calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); - // Loop over all of the PHI nodes in the header block, and change any - // references to the old incoming edge to be the new incoming edge. + // Loop over all of the PHI nodes in the header and exit blocks, and change + // any references to the old incoming edge to be the new incoming edge. for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { PHINode *PN = cast(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -1279,35 +1329,11 @@ PN->setIncomingBlock(i, newFuncRoot); } - // Look at all successors of the codeReplacer block. If any of these blocks - // had PHI nodes in them, we need to update the "from" block to be the code - // replacer, not the original block in the extracted region. - for (BasicBlock *SuccBB : successors(codeReplacer)) { - for (PHINode &PN : SuccBB->phis()) { - Value *IncomingCodeReplacerVal = nullptr; - SmallVector IncomingValsToRemove; - for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) { - BasicBlock *IncomingBB = PN.getIncomingBlock(I); - - // Ignore incoming values from outside of the extracted region. - if (!Blocks.count(IncomingBB)) - continue; - - // Ensure that there is only one incoming value from codeReplacer. - if (!IncomingCodeReplacerVal) { - PN.setIncomingBlock(I, codeReplacer); - IncomingCodeReplacerVal = PN.getIncomingValue(I); - } else { - assert(IncomingCodeReplacerVal == PN.getIncomingValue(I) && - "PHI has two incompatbile incoming values from codeRepl"); - IncomingValsToRemove.push_back(I); - } - } - - for (unsigned I : reverse(IncomingValsToRemove)) - PN.removeIncomingValue(I, /*DeletePHIIfEmpty=*/false); - } - } + for (BasicBlock *ExitBB : ExitBlocks) + for (PHINode &PN : ExitBB->phis()) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + if (Blocks.count(PN.getIncomingBlock(i))) + PN.setIncomingBlock(i, codeReplacer); // Erase debug info intrinsics. Variable updates within the new function are // invisible to debuggers. This could be improved by defining a DISubprogram @@ -1338,6 +1364,8 @@ newFunction->setDoesNotReturn(); LLVM_DEBUG(if (verifyFunction(*newFunction)) - report_fatal_error("verifyFunction failed!")); + report_fatal_error("verification of newFunction failed!")); + LLVM_DEBUG(if (verifyFunction(*oldFunction)) + report_fatal_error("verification of oldFunction failed!")); return newFunction; } Index: test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll =================================================================== --- test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll +++ test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll @@ -15,9 +15,9 @@ ; CHECK: call {{.*}}@sideeffect( ; CHECK: call {{.*}}@realloc( ; CHECK-LABEL: codeRepl: -; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr) +; CHECK-NEXT: call {{.*}}@realloc2.cold.1(i64 %size, i8* %ptr, i8** %retval.0.ce.loc) ; CHECK-LABEL: cleanup: -; CHECK-NEXT: phi i8* [ null, %if.then ], [ null, %codeRepl ], [ %call, %if.end ] +; CHECK-NEXT: phi i8* [ null, %if.then ], [ %call, %if.end ], [ %retval.0.ce.reload, %codeRepl ] define i8* @realloc2(i8* %ptr, i64 %size) { entry: %0 = add i64 %size, -1 Index: unittests/Transforms/Utils/CodeExtractorTest.cpp =================================================================== --- unittests/Transforms/Utils/CodeExtractorTest.cpp +++ unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -21,7 +21,7 @@ using namespace llvm; namespace { -TEST(CodeExtractor, DISABLED_ExitStub) { +TEST(CodeExtractor, ExitStub) { LLVMContext Ctx; SMDiagnostic Err; std::unique_ptr M(parseAssemblyString(R"invalid( @@ -46,25 +46,6 @@ )invalid", Err, Ctx)); - // CodeExtractor miscompiles this function. There appear to be some issues - // with the handling of outlined regions with live output values. - // - // In the original function, CE adds two reloads in the codeReplacer block: - // - // codeRepl: ; preds = %header - // call void @foo_header.split(i32 %z, i32 %x, i32 %y, i32* %.loc, i32* %.loc1) - // %.reload = load i32, i32* %.loc - // %.reload2 = load i32, i32* %.loc1 - // br label %notExtracted - // - // These reloads must flow into the notExtracted block: - // - // notExtracted: ; preds = %codeRepl - // %0 = phi i32 [ %.reload, %codeRepl ], [ %.reload2, %body2 ] - // - // The problem is that the PHI node in notExtracted now has an incoming - // value from a BasicBlock that's in a different function. - Function *Func = M->getFunction("foo"); SmallVector Candidates; for (auto &BB : *Func) { @@ -84,5 +65,6 @@ Function *Outlined = CE.extractCodeRegion(); EXPECT_TRUE(Outlined); EXPECT_FALSE(verifyFunction(*Outlined)); + EXPECT_FALSE(verifyFunction(*Func)); } } // end anonymous namespace