diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -278,13 +278,29 @@ // We iterate over the instructions in the region, if we find a PHINode, we // check if there are predecessors outside of the region, if there are, // we ignore this region since we are unable to handle the severing of the - // phi node right now. + // phi node right now. + + // TODO: Handle extraneous inputs for PHINodes through variable number of + // inputs, similar to how outputs are handled. BasicBlock::iterator It = StartInst->getIterator(); + EndBB = BackInst->getParent(); + BasicBlock *IBlock; + bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst; while (PHINode *PN = dyn_cast(&*It)) { unsigned NumPredsOutsideRegion = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!BBSet.contains(PN->getIncomingBlock(i))) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!BBSet.contains(PN->getIncomingBlock(i))) { + ++NumPredsOutsideRegion; + continue; + } + + // We must consider the case there the incoming block to the PHINode is + // the same as the final block of the OutlinableRegion. If this is the + // case, the branch from this block must also be outlined to be valid. + IBlock = PN->getIncomingBlock(i); + if (IBlock == EndBB && EndBBTermAndBackInstDifferent) ++NumPredsOutsideRegion; + } if (NumPredsOutsideRegion > 1) return; @@ -299,11 +315,9 @@ // If the region ends with a PHINode, but does not contain all of the phi node // instructions of the region, we ignore it for now. - if (isa(BackInst)) { - EndBB = BackInst->getParent(); - if (BackInst != &*std::prev(EndBB->getFirstInsertionPt())) - return; - } + if (isa(BackInst) && + BackInst != &*std::prev(EndBB->getFirstInsertionPt())) + return; // The basic block gets split like so: // block: block: diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-parent-block-referential.ll b/llvm/test/Transforms/IROutliner/phi-nodes-parent-block-referential.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/phi-nodes-parent-block-referential.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we do not outline when all of the phi nodes in the beginning +; block are included not in the region. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + %y = add i32 %c, %c + br label %test1 +dummy: + ret void +test1: + %1 = phi i32 [ %e, %test1 ], [ %y, %entry ] + %2 = phi i32 [ %e, %test1 ], [ %y, %entry ] + %e = load i32, i32* %0, align 4 + %3 = add i32 %c, %c + %4 = sub i32 %c, %c + br i1 true, label %first, label %test1 +first: + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + %y = mul i32 %c, %c + br label %test1 +dummy: + ret void +test1: + %1 = phi i32 [ %e, %test1 ], [ %y, %entry ] + %2 = phi i32 [ %y, %entry ], [ %e, %test1 ] + %e = load i32, i32* %0, align 4 + %3 = add i32 %c, %c + %4 = mul i32 %c, %c + br i1 true, label %first, label %test1 +first: + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[E_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[Y:%.*]] = add i32 [[C]], [[C]] +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: dummy: +; CHECK-NEXT: ret void +; CHECK: test1: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]]) +; CHECK-NEXT: [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C]], [[C]] +; CHECK-NEXT: br i1 true, label [[FIRST:%.*]], label [[TEST1]] +; CHECK: first: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[E_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[Y:%.*]] = mul i32 [[C]], [[C]] +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: dummy: +; CHECK-NEXT: ret void +; CHECK: test1: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]]) +; CHECK-NEXT: [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[C]], [[C]] +; CHECK-NEXT: br i1 true, label [[FIRST:%.*]], label [[TEST1]] +; CHECK: first: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[TEST1_TO_OUTLINE:%.*]] +; CHECK: test1_to_outline: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]] +; CHECK-NEXT: br label [[TEST1_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: test1_after_outline.exitStub: +; CHECK-NEXT: store i32 [[E]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: ret void +;