Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2070,14 +2070,6 @@ IdxTy): ConstantInt::get(IdxTy, 0); - // We need an instruction to anchor the overflow check on. StartIdx needs to - // be defined before the overflow check branch. Because the scalar preheader - // is going to merge the start index and so the overflow branch block needs to - // contain a definition of the start index. - Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd( - StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor", - BypassBlock->getTerminator()); - // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), BypassBlock->getTerminator()); @@ -2085,10 +2077,8 @@ LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. - BasicBlock *VectorPH = - BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); BasicBlock *VecBody = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); + BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.body"); BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = @@ -2103,7 +2093,6 @@ if (ParentLoop) { ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(ScalarPH, *LI); - ParentLoop->addBasicBlockToLoop(VectorPH, *LI); ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI); } else { LI->addTopLevelLoop(Lp); @@ -2124,6 +2113,21 @@ // This is the IR builder that we use to add all of the logic for bypassing // the new vector loop. IRBuilder<> BypassBuilder(BypassBlock->getTerminator()); + + // Generate code to check that the loops trip count that we computed by adding + // one to the backedge-taken count will not overflow. + + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock( + BypassBlock->getTerminator(), "overflow.checked"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); + + BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + BypassBuilder.CreateCondBr(CheckBCOverflow, ScalarPH, CheckBlock); + BypassBlock->getTerminator()->eraseFromParent(); + BypassBuilder.SetInsertPoint(CheckBlock->getTerminator()); + BypassBlock = CheckBlock; + setDebugLocFromInst(BypassBuilder, getDebugLocFromInstOrOperands(OldInduction)); @@ -2153,23 +2157,16 @@ Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); - BasicBlock *LastBypassBlock = BypassBlock; - - // Generate code to check that the loops trip count that we computed by adding - // one to the backedge-taken count will not overflow. - { - auto PastOverflowCheck = - std::next(BasicBlock::iterator(OverflowCheckAnchor)); - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); - Instruction *OldTerm = LastBypassBlock->getTerminator(); - BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm); - OldTerm->eraseFromParent(); - LastBypassBlock = CheckBlock; - } + CheckBlock = BypassBlock->splitBasicBlock( + BypassBlock->getTerminator(), "vector.ph"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); + LoopBypassBlocks.push_back(BypassBlock); + BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + BypassBuilder.CreateCondBr(Cmp, MiddleBlock, CheckBlock); + BypassBlock->getTerminator()->eraseFromParent(); + BypassBuilder.SetInsertPoint(CheckBlock->getTerminator()); + BypassBlock = CheckBlock; // Generate the code to check that the strides we assumed to be one are really // one. We want the new basic block to start at the first instruction in a @@ -2177,24 +2174,24 @@ Instruction *StrideCheck; Instruction *FirstCheckInst; std::tie(FirstCheckInst, StrideCheck) = - addStrideCheck(LastBypassBlock->getTerminator()); + addStrideCheck(BypassBlock->getTerminator()); if (StrideCheck) { AddedSafetyChecks = true; // Create a new block containing the stride check. - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); + BypassBlock->setName("vector.stridecheck"); + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock( + BypassBlock->getTerminator(), "vector.ph"); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); + LoopBypassBlocks.push_back(BypassBlock); // Replace the branch into the memory check block with a conditional branch // for the "few elements case". - Instruction *OldTerm = LastBypassBlock->getTerminator(); - BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); - OldTerm->eraseFromParent(); - - Cmp = StrideCheck; - LastBypassBlock = CheckBlock; + BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + BypassBuilder.CreateCondBr(StrideCheck, MiddleBlock, CheckBlock); + BypassBlock->getTerminator()->eraseFromParent(); + BypassBuilder.SetInsertPoint(CheckBlock->getTerminator()); + BypassBlock = CheckBlock; } // Generate the code that checks in runtime if arrays overlap. We put the @@ -2202,30 +2199,24 @@ // faster. Instruction *MemRuntimeCheck; std::tie(FirstCheckInst, MemRuntimeCheck) = - Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator()); + Legal->getLAI()->addRuntimeCheck(BypassBlock->getTerminator()); if (MemRuntimeCheck) { AddedSafetyChecks = true; - // Create a new block containing the memory check. - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck"); + BypassBlock->setName("vector.memcheck"); + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock( + BypassBlock->getTerminator(), "vector.ph"); + BypassBuilder.SetInsertPoint(CheckBlock->getTerminator()); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); + LoopBypassBlocks.push_back(BypassBlock); - // Replace the branch into the memory check block with a conditional branch - // for the "few elements case". - Instruction *OldTerm = LastBypassBlock->getTerminator(); - BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); - OldTerm->eraseFromParent(); - - Cmp = MemRuntimeCheck; - LastBypassBlock = CheckBlock; + BypassBuilder.SetInsertPoint(BypassBlock->getTerminator()); + BypassBuilder.CreateCondBr(MemRuntimeCheck, MiddleBlock, CheckBlock); + BypassBlock->getTerminator()->eraseFromParent(); + BypassBuilder.SetInsertPoint(CheckBlock->getTerminator()); + BypassBlock = CheckBlock; } - LastBypassBlock->getTerminator()->eraseFromParent(); - BranchInst::Create(MiddleBlock, VectorPH, Cmp, - LastBypassBlock); - // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. @@ -2365,7 +2356,7 @@ // Create i+1 and fill the PHINode. Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next"); - Induction->addIncoming(StartIdx, VectorPH); + Induction->addIncoming(StartIdx, BypassBlock); Induction->addIncoming(NextIdx, VecBody); // Create the compare. Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown); @@ -2378,7 +2369,7 @@ Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); // Save the state. - LoopVectorPreHeader = VectorPH; + LoopVectorPreHeader = BypassBlock; LoopScalarPreHeader = ScalarPH; LoopMiddleBlock = MiddleBlock; LoopExitBlock = ExitBlock;