Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -283,11 +283,13 @@ BasicBlock::iterator It = StartInst->getIterator(); EndBB = BackInst->getParent(); BasicBlock *IBlock; + BasicBlock *PHIPredBlock = nullptr; bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst; while (PHINode *PN = dyn_cast(&*It)) { unsigned NumPredsOutsideRegion = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (!BBSet.contains(PN->getIncomingBlock(i))) { + PHIPredBlock = PN->getIncomingBlock(i); ++NumPredsOutsideRegion; continue; } @@ -296,8 +298,10 @@ // the same as the final block of the OutlinableRegion. If this is the // case, the branch from this block must also be outlined to be valid. IBlock = PN->getIncomingBlock(i); - if (IBlock == EndBB && EndBBTermAndBackInstDifferent) + if (IBlock == EndBB && EndBBTermAndBackInstDifferent) { + PHIPredBlock = PN->getIncomingBlock(i); ++NumPredsOutsideRegion; + } } if (NumPredsOutsideRegion > 1) @@ -336,6 +340,10 @@ StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); + // If there was a PHINode with an incoming block outside the region, + // make sure is correctly updated in the newly split block. + if (PHIPredBlock) + PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB); CandidateSplit = true; if (!BackInst->isTerminator()) { @@ -379,6 +387,23 @@ assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); + // Make sure PHINode references to the block we are merging into are + // updated to be incoming blocks from the predecessor to the current block. + + // NOTE: If this is updated such that the outlined block can have more than + // one incoming block to a PHINode, this logic will have to updated + // to handle multiple precessors instead. + + // We only need to update this if the outlined section contains a PHINode, if + // it does not, then the incoming block was never changed in the first place. + // On the other hand, if PrevBB has no predecessors, it means that all + // incoming blocks to the first block are contained in the region, and there + // will be nothing to update. + Instruction *StartInst = (*Candidate->begin()).Inst; + if (isa(StartInst) && !PrevBB->hasNPredecessors(0)) { + BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor(); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB); + } PrevBB->getTerminator()->eraseFromParent(); // If we reattaching after outlining, we iterate over the phi nodes to @@ -1183,7 +1208,17 @@ assert(Cand.getStartBB() == IncomingBlock && "Unknown basic block used in exit path PHINode."); - BasicBlock *PrevBlock = IncomingBlock->getSinglePredecessor(); + BasicBlock *PrevBlock = nullptr; + // Iterate over the predecessors to the incoming block of the + // PHINode, when we find a block that is not contained in the region + // we know that this is the first block that we split from, and should + // have a valid global value numbering. + for (BasicBlock *Pred : predecessors(IncomingBlock)) + if (!Blocks.contains(Pred)) { + PrevBlock = Pred; + break; + } + assert(PrevBlock && "PrevBlock is nullptr!"); OGVN = Cand.getGVN(PrevBlock); } GVN = OGVN.getValue(); Index: llvm/test/Transforms/IROutliner/no-external-block-entries.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/no-external-block-entries.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; When the an outlined section contains a PHINode no incoming blocks outside +; the region, we need to make sure that we do not try to reassign +; the incoming blocks when splitting and reattaching the region. + +define void @fn1() local_unnamed_addr #0 { +entry: + br label %block_2 + +block_1: + %a = phi i32 [ 0, %block_2], [ 1, %intermediate_block_1 ] + br i1 0, label %block_3, label %block_2 +intermediate_block_1: + br label %block_1 +block_2: + br i1 0, label %block_3, label %block_1 + +block_3: + %b = phi i32 [ 0, %block_2 ], [ 1, %block_1 ] + br label %block_5 + +block_4: + %c = phi i32 [ 0, %block_5 ], [ 1, %intermediate_block_2 ] + br i1 0, label %block_6, label %block_5 +intermediate_block_2: + br label %block_4 +block_5: + br i1 0, label %block_6, label %block_4 + +block_6: + unreachable +} +; CHECK-LABEL: @fn1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[B_CE_LOC]], i32 0) +; CHECK-NEXT: [[B_CE_RELOAD:%.*]] = load i32, i32* [[B_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_3:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_CE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @outlined_ir_func_0(i32* null, i32 -1) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: unreachable +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ 0, [[BLOCK_2]] ], [ 1, [[INTERMEDIATE_BLOCK_1:%.*]] ] +; CHECK-NEXT: br i1 false, label [[BLOCK_3_SPLIT:%.*]], label [[BLOCK_2]] +; CHECK: intermediate_block_1: +; CHECK-NEXT: br label [[BLOCK_1:%.*]] +; CHECK: block_2: +; CHECK-NEXT: br i1 false, label [[BLOCK_3_SPLIT]], label [[BLOCK_1]] +; CHECK: block_3.split: +; CHECK-NEXT: [[B_CE:%.*]] = phi i32 [ 0, [[BLOCK_2]] ], [ 1, [[BLOCK_1]] ] +; CHECK-NEXT: br label [[BLOCK_3_EXITSTUB:%.*]] +; CHECK: block_3.exitStub: +; CHECK-NEXT: switch i32 [[TMP1:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[B_CE]], i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/one-external-incoming-block-phi-node.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/one-external-incoming-block-phi-node.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; When the an outlined section contains a PHINode with an incoming block +; outside of the region, the predecessors to be handled specially to ensure that +; a global value is properly assigned. This ensures that there is no crash +; in that situation. + +define void @fn1() local_unnamed_addr #0 { +entry: + br label %block_1 + +block_1: + %a = phi i32 [ 0, %block_2], [ 1, %entry ] + br i1 0, label %block_3, label %block_2 +block_2: + br i1 0, label %block_3, label %block_1 + +block_3: + %b = phi i32 [ 0, %block_2 ], [ 1, %block_1 ] + br label %block_4 + +block_4: + %c = phi i32 [ 0, %block_5 ], [ 1, %block_3 ] + br i1 0, label %block_6, label %block_5 + +block_5: + br i1 0, label %block_6, label %block_4 + +block_6: + unreachable +} +; CHECK-LABEL: @fn1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[B_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[B_CE_LOC]], i32 0) +; CHECK-NEXT: [[B_CE_RELOAD:%.*]] = load i32, i32* [[B_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_3:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_CE_RELOAD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @outlined_ir_func_0(i32* null, i32 -1) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: unreachable +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: br label [[BLOCK_1:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ 0, [[BLOCK_2:%.*]] ], [ 1, [[ENTRY_TO_OUTLINE]] ] +; CHECK-NEXT: br i1 false, label [[BLOCK_3_SPLIT:%.*]], label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: br i1 false, label [[BLOCK_3_SPLIT]], label [[BLOCK_1]] +; CHECK: block_3.split: +; CHECK-NEXT: [[B_CE:%.*]] = phi i32 [ 0, [[BLOCK_2]] ], [ 1, [[BLOCK_1]] ] +; CHECK-NEXT: br label [[BLOCK_3_EXITSTUB:%.*]] +; CHECK: block_3.exitStub: +; CHECK-NEXT: switch i32 [[TMP1:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[B_CE]], i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +;