diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1593,15 +1593,41 @@ ExitPhi = PN; } - if (!ExitPhi || ExitPhi->getNumIncomingValues() != 1) - return false; - auto LastLI = dyn_cast(*ExitPhi->user_begin()); + PHINode *Current = ExitPhi; + PHINode *LastPhi = nullptr; + SmallVector Blocks; + while (Current) { + LastPhi = Current; + if (Current->hasOneUse()) { + Blocks.push_back(Current->getParent()); + Current = dyn_cast(*Current->user_begin()); + } else { + return false + } + } + + auto LastLI = dyn_cast(*LastPhi->user_begin()); auto ExitBlock = L->getExitBlock(); - if (!LastLI || ExitPhi->getParent() != LastLI->getParent() || - ExitPhi->getParent() != ExitBlock) + if (!LastLI || LastPhi->getParent() != LastLI->getParent() || + ExitBlock != Blocks[0]) return false; - for (Instruction &I : make_range(ExitBlock->begin(), LastLI->getIterator())) { + // If we have phis across multiple blocks, make sure the blocks only contain + // the phi and an unconditional branch and are connected to the previous + // block. This ensures that the last block in the chain executes if the loop + // exit block executes. + for (unsigned I = 1; I < Blocks.size(); ++I) { + BasicBlock *A = Blocks[I - 1]; + if (&*std::next(A->begin()) != A->getTerminator()) + return false; + auto *Term = dyn_cast(A->getTerminator()); + if (Term->isConditional() || A->getSingleSuccessor() != Blocks[I]) + return false; + } + + // Make sure LastLI is guaranteed to execute on entry to its containing block. + for (Instruction &I : + make_range(Blocks.back()->begin(), LastLI->getIterator())) { if (!isGuaranteedToTransferExecutionToSuccessor(&I) || I.mayWriteToMemory()) return false; } @@ -1652,10 +1678,37 @@ // Finally, replace phi nodes referencing the pointer select with a phi node // for the selected value. - PHINode *ExitValuePhi = PHINode::Create( - LastLI->getType(), 2, LastLI->getName() + ".promoted.load", ExitPhi); - ExitValuePhi->addIncoming(NewSel, ExitPhi->getIncomingBlock(0)); - LastLI->replaceAllUsesWith(ExitValuePhi); + Current = ExitPhi; + DenseMap Old2New; + Old2New[Sel] = NewSel; + while (Current) { + PHINode *ValuePhi = PHINode::Create( + LastLI->getType(), 2, LastLI->getName() + ".promoted.load", Current); + for (unsigned I = 0; I != Current->getNumIncomingValues(); ++I) { + BasicBlock *IncBB = Current->getIncomingBlock(I); + ValuePhi->addIncoming(PoisonValue::get(ValuePhi->getType()), IncBB); + auto Iter = Old2New.find(Current->getIncomingValue(I)); + if (Iter != Old2New.end()) { + ValuePhi->setIncomingValue(I, Iter->second); + } else { + BasicBlock *Split = + SplitEdge(IncBB, Current->getParent(), DT, LI, MSSAU); + auto NewLI = new LoadInst( + LastLI->getType(), Current->getIncomingValue(I), + LastLI->getName() + ".promoted", Split->getTerminator()); + IncBB = Split; + if (MSSAU) { + MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( + NewLI, nullptr, NewLI->getParent(), MemorySSA::End); + MSSAU->insertUse(cast(NewMemAcc), /*RenameUses=*/true); + } + ValuePhi->setIncomingValue(I, NewLI); + } + } + Old2New[Current] = ValuePhi; + Current = dyn_cast(*Current->user_begin()); + } + LastLI->replaceAllUsesWith(Old2New[LastPhi]); markInstructionForDeletion(PhiLoad); return true; } diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll --- a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll @@ -244,25 +244,30 @@ ; CHECK-LABEL: @test_pointer_phi_select_same_object_split_edge( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1 -; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[ENTRY_EXIT_CRIT_EDGE:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK: entry.exit_crit_edge: +; CHECK-NEXT: [[RES_PROMOTED:%.*]] = load i32, i32* [[END:%.*]], align 4 +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[L_2_PROMOTED:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: +; CHECK-NEXT: [[L_2_PROMOTED_LOAD:%.*]] = phi i32 [ [[L_2_PROMOTED]], [[LOOP_PREHEADER]] ], [ [[TMP0:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[PTR_IV:%.*]] = phi i32* [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ], [ [[START_PTR]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[MIN_PTR:%.*]] = phi i32* [ [[MIN_SELECT:%.*]], [[LOOP]] ], [ [[PTR]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4 -; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4 -; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]] +; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2_PROMOTED_LOAD]] +; CHECK-NEXT: [[TMP0]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2_PROMOTED_LOAD]] ; CHECK-NEXT: [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]] ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END]] ; CHECK-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]] ; CHECK: loop.exit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[LCSSA_PHI_2:%.*]] = phi i32* [ [[END]], [[ENTRY:%.*]] ], [ [[MIN_SELECT]], [[LOOP_EXIT]] ] -; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[LCSSA_PHI_2]], align 4 -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: [[RES_PROMOTED_LOAD1:%.*]] = phi i32 [ [[RES_PROMOTED]], [[ENTRY_EXIT_CRIT_EDGE]] ], [ [[TMP0]], [[LOOP_EXIT]] ] +; CHECK-NEXT: [[LCSSA_PHI_2:%.*]] = phi i32* [ [[END]], [[ENTRY_EXIT_CRIT_EDGE]] ], [ [[MIN_SELECT]], [[LOOP_EXIT]] ] +; CHECK-NEXT: ret i32 [[RES_PROMOTED_LOAD1]] ; entry: %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1