diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -311,9 +311,9 @@ /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue /// vectorization re-using plans for both the main and epilogue vector loops. /// It should be removed once the re-use issue has been fixed. - void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, - InnerLoopVectorizer &LB, DominatorTree *DT, - bool IsEpilogueVectorization); + VPTransformState executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, + InnerLoopVectorizer &LB, DominatorTree *DT, + bool IsEpilogueVectorization); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printPlans(raw_ostream &O); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -497,8 +497,11 @@ /// loop and the start value for the canonical induction, if it is != 0. The /// latter is the case when vectorizing the epilogue loop. In the case of /// epilogue vectorization, this function is overriden to handle the more - /// complex control flow around the loops. - virtual std::pair createVectorizedLoopSkeleton(); + /// complex control flow around the loops. \p Plan is the VPlan the skeleton + /// is created for and together with \p State it is used to look up properties + /// needed for skeleton creation, like SCEV expansions. + virtual std::pair + createVectorizedLoopSkeleton(VPlan &Plan, VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); @@ -560,7 +563,7 @@ /// bypass block, the \p AdditionalBypass pair provides information about the /// bypass block and the end value on the edge from bypass to this loop. PHINode *createInductionResumeValue( - PHINode *OrigPhi, const InductionDescriptor &ID, + PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step, ArrayRef BypassBlocks, std::pair AdditionalBypass = {nullptr, nullptr}); @@ -646,6 +649,7 @@ /// block, the \p AdditionalBypass pair provides information about the bypass /// block and the end value on the edge from bypass to this loop. void createInductionResumeValues( + VPlan &Plan, VPTransformState &State, std::pair AdditionalBypass = {nullptr, nullptr}); /// Complete the loop skeleton by adding debug MDs, creating appropriate @@ -835,15 +839,17 @@ // Override this function to handle the more complex control flow around the // three loops. - std::pair createVectorizedLoopSkeleton() final { - return createEpilogueVectorizedLoopSkeleton(); + std::pair + createVectorizedLoopSkeleton(VPlan &Plan, VPTransformState &State) final { + return createEpilogueVectorizedLoopSkeleton(Plan, State); } /// The interface for creating a vectorized skeleton using one of two /// different strategies, each corresponding to one execution of the vplan /// as described above. virtual std::pair - createEpilogueVectorizedLoopSkeleton() = 0; + createEpilogueVectorizedLoopSkeleton(VPlan &Plan, + VPTransformState &State) = 0; /// Holds and updates state information required to vectorize the main loop /// and its epilogue in two separate passes. This setup helps us avoid @@ -871,7 +877,9 @@ EPI, LVL, CM, BFI, PSI, Check) {} /// Implements the interface for creating a vectorized skeleton using the /// *main loop* strategy (ie the first pass of vplan execution). - std::pair createEpilogueVectorizedLoopSkeleton() final; + std::pair + createEpilogueVectorizedLoopSkeleton(VPlan &Plan, + VPTransformState &State) final; protected: /// Emits an iteration count bypass check once for the main loop (when \p @@ -901,7 +909,9 @@ } /// Implements the interface for creating a vectorized skeleton using the /// *epilogue loop* strategy (ie the second pass of vplan execution). - std::pair createEpilogueVectorizedLoopSkeleton() final; + std::pair + createEpilogueVectorizedLoopSkeleton(VPlan &Plan, + VPTransformState &State) final; protected: /// Emits an iteration count bypass check after the main vector loop has @@ -2418,21 +2428,6 @@ } } -// Generate code for the induction step. Note that induction steps are -// required to be loop-invariant -static Value *CreateStepValue(const SCEV *Step, ScalarEvolution &SE, - Instruction *InsertBefore, - Loop *OrigLoop = nullptr) { - const DataLayout &DL = SE.getDataLayout(); - assert((!OrigLoop || SE.isLoopInvariant(Step, OrigLoop)) && - "Induction step should be loop invariant"); - if (auto *E = dyn_cast(Step)) - return E->getValue(); - - SCEVExpander Exp(SE, DL, "induction"); - return Exp.expandCodeFor(Step, Step->getType(), InsertBefore); -} - /// Compute the transformed value of Index at offset StartValue using step /// StepValue. /// For integer induction, returns StartValue + Index * StepValue. @@ -3136,7 +3131,7 @@ } PHINode *InnerLoopVectorizer::createInductionResumeValue( - PHINode *OrigPhi, const InductionDescriptor &II, + PHINode *OrigPhi, const InductionDescriptor &II, Value *Step, ArrayRef BypassBlocks, std::pair AdditionalBypass) { Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); @@ -3155,8 +3150,6 @@ if (II.getInductionBinOp() && isa(II.getInductionBinOp())) B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - Value *Step = - CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(), Step, II); EndValue->setName("ind.end"); @@ -3164,8 +3157,6 @@ // Compute the end value for the additional bypass (if applicable). if (AdditionalBypass.first) { B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt())); - Value *Step = - CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint()); EndValueFromAdditionalBypass = emitTransformedIndex( B, AdditionalBypass.second, II.getStartValue(), Step, II); EndValueFromAdditionalBypass->setName("ind.end"); @@ -3195,6 +3186,7 @@ } void InnerLoopVectorizer::createInductionResumeValues( + VPlan &Plan, VPTransformState &State, std::pair AdditionalBypass) { assert(((AdditionalBypass.first && AdditionalBypass.second) || (!AdditionalBypass.first && !AdditionalBypass.second)) && @@ -3209,8 +3201,11 @@ for (const auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; const InductionDescriptor &II = InductionEntry.second; + VPValue *StepSCEV = Plan.getSCEVExpansion(II.getStep()); + Value *Step = StepSCEV->getDefiningRecipe() ? State.get(StepSCEV, {0, 0}) + : StepSCEV->getLiveInIRValue(); PHINode *BCResumeVal = createInductionResumeValue( - OrigPhi, II, LoopBypassBlocks, AdditionalBypass); + OrigPhi, II, Step, LoopBypassBlocks, AdditionalBypass); OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal); } } @@ -3251,7 +3246,8 @@ } std::pair -InnerLoopVectorizer::createVectorizedLoopSkeleton() { +InnerLoopVectorizer::createVectorizedLoopSkeleton(VPlan &Plan, + VPTransformState &State) { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the @@ -3306,7 +3302,7 @@ emitMemRuntimeChecks(LoopScalarPreHeader); // Emit phis for the new starting index of the scalar loop. - createInductionResumeValues(); + createInductionResumeValues(Plan, State); return {completeLoopSkeleton(), nullptr}; } @@ -7668,11 +7664,9 @@ } } -void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, - VPlan &BestVPlan, - InnerLoopVectorizer &ILV, - DominatorTree *DT, - bool IsEpilogueVectorization) { +VPTransformState LoopVectorizationPlanner::executePlan( + ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, + InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization) { assert(BestVPlan.hasVF(BestVF) && "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && @@ -7704,7 +7698,7 @@ // middle block. The vector loop is created during VPlan execution. Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = - ILV.createVectorizedLoopSkeleton(); + ILV.createVectorizedLoopSkeleton(BestVPlan, State); // Only use noalias metadata when using memory checks guaranteeing no overlap // across all iterations. @@ -7772,6 +7766,8 @@ ILV.fixVectorizedLoop(State, BestVPlan); ILV.printDebugTracesAtEnd(); + + return State; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -7793,7 +7789,8 @@ /// This function is partially responsible for generating the control flow /// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization. std::pair -EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { +EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( + VPlan &Plan, VPTransformState &State) { createVectorLoopSkeleton(""); // Generate the code to check the minimum iteration count of the vector @@ -7911,7 +7908,8 @@ /// This function is partially responsible for generating the control flow /// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization. std::pair -EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { +EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( + VPlan &Plan, VPTransformState &State) { createVectorLoopSkeleton("vec.epilog."); // Now, compare the remaining count and if there aren't enough iterations to @@ -8009,7 +8007,8 @@ // check, then the resume value for the induction variable comes from // the trip count of the main vector loop, hence passing the AdditionalBypass // argument. - createInductionResumeValues({VecEpilogueIterationCountCheck, + createInductionResumeValues(Plan, State, + {VecEpilogueIterationCountCheck, EPI.VectorTripCount} /* AdditionalBypass */); return {completeLoopSkeleton(), EPResumeVal}; @@ -8893,6 +8892,11 @@ VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), *PSE.getSE()); + + for (const auto &[_, II] : Legal->getInductionVars()) { + vputils::getOrCreateVPValueForSCEVExpr(*Plan, II.getStep(), *PSE.getSE()); + } + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); @@ -10382,8 +10386,8 @@ EPI, &LVL, &CM, BFI, PSI, Checks); VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF); - LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, - DT, true); + auto State = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, + BestMainPlan, MainILV, DT, true); ++LoopsVectorized; // Second pass vectorizes the epilogue and adjusts the control flow @@ -10436,8 +10440,12 @@ ID = &WidenInd->getInductionDescriptor(); } + VPValue *StepSCEV = BestMainPlan.getSCEVExpansion(ID->getStep()); + Value *Step = StepSCEV->getDefiningRecipe() + ? State.get(StepSCEV, {0, 0}) + : StepSCEV->getLiveInIRValue(); ResumeV = MainILV.createInductionResumeValue( - IndPhi, *ID, {EPI.MainLoopIterationCountCheck}); + IndPhi, *ID, Step, {EPI.MainLoopIterationCountCheck}); } assert(ResumeV && "Must have a resume value"); VPValue *StartVal = BestEpiPlan.getVPValueOrAddLiveIn(ResumeV); diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -29,7 +29,6 @@ ; CHECK: L1.early.exit: ; CHECK-NEXT: ret void ; CHECK: L1.exit: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA2:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[L1_EXIT_VAL:%.*]] = phi i32 [ [[L1_SUM_NEXT]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: br label [[L2_HEADER:%.*]] @@ -45,7 +44,7 @@ ; CHECK: L2.Inner.header.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA1]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 1, [[TMP3]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -58,11 +57,11 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[L2_HEADER_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label [[L2_INNER_HEADER:%.*]] ; CHECK: L2.Inner.header: ; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[L2_ACCUM_NEXT]] = sub i32 [[L2_ACCUM]], [[L1_EXIT_VAL]] ; CHECK-NEXT: [[L2_DUMMY_BUT_NEED_IT:%.*]] = sext i32 [[L2_ACCUM_NEXT]] to i64 ; CHECK-NEXT: [[L2_IV_NEXT]] = add nuw nsw i64 [[L2_IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll @@ -27,9 +27,8 @@ ; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -233,9 +233,8 @@ ; STRIDED: vector.scevcheck: ; STRIDED-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -19,12 +19,11 @@ ; CHECK-NEXT: [[INDUCTION_IV_NEXT]] = add i32 [[INDUCTION_IV]], [[TMP1]] ; CHECK-NEXT: br i1 false, label [[LOOP_1]], label [[LOOP_2_PREHEADER:%.*]] ; CHECK: loop.2.preheader: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i32 [ [[IV_1]], [[LOOP_1]] ] ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA1]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -46,11 +45,11 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV_3:%.*]] = phi i16 [ [[IV_3_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_4_NEXT]] = sub i32 [[IV_4]], [[IV_1_LCSSA]] ; CHECK-NEXT: [[IV_3_NEXT]] = add i16 [[IV_3]], 1 ; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_3]], 198 @@ -103,3 +102,187 @@ unreachable.bb: ; No predecessors! br label %loop.1.preheader } + +define void @test2_pr58811() { +; CHECK-LABEL: @test2_pr58811( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[INVOKE_CONT27:%.*]] +; CHECK: invoke.cont27.loopexit: +; CHECK-NEXT: [[SUB93_2_LCSSA:%.*]] = phi i32 [ [[SUB93_2:%.*]], [[INVOKE_CONT99_2:%.*]] ] +; CHECK-NEXT: br label [[INVOKE_CONT27]] +; CHECK: invoke.cont27: +; CHECK-NEXT: [[UINT32_TVAR_174_0752:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB93_2_LCSSA]], [[INVOKE_CONT27_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[UINT32_TVAR_174_0752]], -1 +; CHECK-NEXT: br label [[INVOKE_CONT99:%.*]] +; CHECK: invoke.cont99: +; CHECK-NEXT: [[INDUCTION_IV:%.*]] = phi i32 [ [[INDUCTION_IV_NEXT:%.*]], [[INVOKE_CONT99]] ], [ [[TMP0]], [[INVOKE_CONT27]] ] +; CHECK-NEXT: [[UINT32_TVAR_174_2746:%.*]] = phi i32 [ [[UINT32_TVAR_174_0752]], [[INVOKE_CONT27]] ], [ [[ADD101:%.*]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[UINT32_TVAR_177_2745:%.*]] = phi i32 [ 0, [[INVOKE_CONT27]] ], [ [[SUB93:%.*]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[UINT32_TVAR_177_2745]], -1 +; CHECK-NEXT: [[SUB93]] = add i32 [[UINT32_TVAR_177_2745]], 1 +; CHECK-NEXT: [[ADD101]] = add i32 [[UINT32_TVAR_177_2745]], [[UINT32_TVAR_174_2746]] +; CHECK-NEXT: [[INDUCTION_IV_NEXT]] = add i32 [[INDUCTION_IV]], [[TMP1]] +; CHECK-NEXT: br i1 false, label [[INVOKE_CONT99]], label [[INVOKE_CONT99_1_PREHEADER:%.*]] +; CHECK: invoke.cont99.1.preheader: +; CHECK-NEXT: [[UINT32_TVAR_174_2746_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_174_2746]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[INDUCTION_IV_LCSSA:%.*]] = phi i32 [ [[INDUCTION_IV]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 199, 196 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[INVOKE_CONT99_2_PREHEADER:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[INVOKE_CONT99_1_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[INVOKE_CONT99_1_PREHEADER]] ] +; CHECK-NEXT: br label [[INVOKE_CONT99_1:%.*]] +; CHECK: invoke.cont99.1: +; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[INVOKE_CONT99_1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[INVOKE_CONT99_1]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[UINT32_TVAR_174_2746_LCSSA]] +; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1 +; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198 +; CHECK-NEXT: br i1 [[CMP88_1]], label [[INVOKE_CONT99_1]], label [[INVOKE_CONT99_2_PREHEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: invoke.cont99.2.preheader: +; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[INVOKE_CONT99_1]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[INVOKE_CONT99_2]] +; CHECK: invoke.cont99.2: +; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[INVOKE_CONT99_2]] ], [ 0, [[INVOKE_CONT99_2_PREHEADER]] ] +; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]] +; CHECK-NEXT: br i1 false, label [[INVOKE_CONT99_2]], label [[INVOKE_CONT27_LOOPEXIT]] +; +entry: + br label %invoke.cont27 + +invoke.cont27: ; preds = %invoke.cont99.2, %entry + %uint32_tVar_174.0752 = phi i32 [ 0, %entry ], [ %sub93.2, %invoke.cont99.2 ] + br label %invoke.cont99 + +invoke.cont99: ; preds = %invoke.cont99, %invoke.cont27 + %uint32_tVar_174.2746 = phi i32 [ %uint32_tVar_174.0752, %invoke.cont27 ], [ %add101, %invoke.cont99 ] + %uint32_tVar_177.2745 = phi i32 [ 0, %invoke.cont27 ], [ %sub93, %invoke.cont99 ] + %sub93 = add i32 %uint32_tVar_177.2745, 1 + %add101 = add i32 %uint32_tVar_177.2745, %uint32_tVar_174.2746 + br i1 false, label %invoke.cont99, label %invoke.cont99.1 + +invoke.cont99.1: ; preds = %invoke.cont99.1, %invoke.cont99 + %int16_tIndArraySafeVar_186.0747.1 = phi i16 [ %inc.1, %invoke.cont99.1 ], [ 0, %invoke.cont99 ] + %uint32_tVar_177.2745.1 = phi i32 [ %sub93.1, %invoke.cont99.1 ], [ 0, %invoke.cont99 ] + %sub93.1 = sub i32 %uint32_tVar_177.2745.1, %uint32_tVar_174.2746 + %inc.1 = add i16 %int16_tIndArraySafeVar_186.0747.1, 1 + %cmp88.1 = icmp ult i16 %int16_tIndArraySafeVar_186.0747.1, 198 + br i1 %cmp88.1, label %invoke.cont99.1, label %invoke.cont99.2 + +invoke.cont99.2: ; preds = %invoke.cont99.2, %invoke.cont99.1 + %uint32_tVar_177.2745.2 = phi i32 [ %sub93.2, %invoke.cont99.2 ], [ 0, %invoke.cont99.1 ] + %sub93.2 = sub i32 %uint32_tVar_177.2745.2, %uint32_tVar_177.2745.1 + br i1 false, label %invoke.cont99.2, label %invoke.cont27 +} + +define void @test3_pr58811() { +; CHECK-LABEL: @test3_pr58811( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[INVOKE_CONT27:%.*]] +; CHECK: invoke.cont27: +; CHECK-NEXT: [[UINT32_TVAR_174_0752:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB93_2:%.*]], [[INVOKE_CONT120_2:%.*]] ] +; CHECK-NEXT: [[REM85:%.*]] = urem i32 1, [[UINT32_TVAR_174_0752]] +; CHECK-NEXT: br label [[INVOKE_CONT99:%.*]] +; CHECK: invoke.cont99: +; CHECK-NEXT: [[UINT32_TVAR_174_2746:%.*]] = phi i32 [ 1, [[INVOKE_CONT27]] ], [ 0, [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[ADD101:%.*]] = add i32 [[REM85]], [[UINT32_TVAR_174_2746]] +; CHECK-NEXT: br i1 false, label [[INVOKE_CONT99]], label [[INVOKE_CONT99_1_PREHEADER:%.*]] +; CHECK: invoke.cont99.1.preheader: +; CHECK-NEXT: [[UINT32_TVAR_174_2746_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_174_2746]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[ADD101_LCSSA:%.*]] = phi i32 [ [[ADD101]], [[INVOKE_CONT99]] ] +; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 1, [[UINT32_TVAR_174_0752]] +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[UINT32_TVAR_174_0752]], [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], [[UINT32_TVAR_174_2746_LCSSA]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[TMP3]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 0, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 1, [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 2, [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 3, [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], [[TMP10]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 199, 196 +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[TMP3]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[INVOKE_CONT99_2_PREHEADER:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[INVOKE_CONT99_1_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[INVOKE_CONT99_1_PREHEADER]] ] +; CHECK-NEXT: br label [[INVOKE_CONT99_1:%.*]] +; CHECK: invoke.cont99.1: +; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[INVOKE_CONT99_1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[INVOKE_CONT99_1]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[ADD101_LCSSA]] +; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1 +; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198 +; CHECK-NEXT: br i1 [[CMP88_1]], label [[INVOKE_CONT99_1]], label [[INVOKE_CONT99_2_PREHEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: invoke.cont99.2.preheader: +; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[INVOKE_CONT99_1]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[INVOKE_CONT99_2:%.*]] +; CHECK: invoke.cont99.2: +; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[INVOKE_CONT99_2]] ], [ 0, [[INVOKE_CONT99_2_PREHEADER]] ] +; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]] +; CHECK-NEXT: br label [[INVOKE_CONT99_2]] +; CHECK: invoke.cont120.2: +; CHECK-NEXT: br label [[INVOKE_CONT27]] +; +entry: + br label %invoke.cont27 + +invoke.cont27: ; preds = %invoke.cont120.2, %entry + %uint32_tVar_174.0752 = phi i32 [ 0, %entry ], [ %sub93.2, %invoke.cont120.2 ] + %rem85 = urem i32 1, %uint32_tVar_174.0752 + br label %invoke.cont99 + +invoke.cont99: ; preds = %invoke.cont99, %invoke.cont27 + %uint32_tVar_174.2746 = phi i32 [ 1, %invoke.cont27 ], [ 0, %invoke.cont99 ] + %add101 = add i32 %rem85, %uint32_tVar_174.2746 + br i1 false, label %invoke.cont99, label %invoke.cont99.1 + +invoke.cont99.1: ; preds = %invoke.cont99.1, %invoke.cont99 + %int16_tIndArraySafeVar_186.0747.1 = phi i16 [ %inc.1, %invoke.cont99.1 ], [ 0, %invoke.cont99 ] + %uint32_tVar_177.2745.1 = phi i32 [ %sub93.1, %invoke.cont99.1 ], [ 0, %invoke.cont99 ] + %sub93.1 = sub i32 %uint32_tVar_177.2745.1, %add101 + %inc.1 = add i16 %int16_tIndArraySafeVar_186.0747.1, 1 + %cmp88.1 = icmp ult i16 %int16_tIndArraySafeVar_186.0747.1, 198 + br i1 %cmp88.1, label %invoke.cont99.1, label %invoke.cont99.2 + +invoke.cont99.2: ; preds = %invoke.cont99.2, %invoke.cont99.1 + %uint32_tVar_177.2745.2 = phi i32 [ %sub93.2, %invoke.cont99.2 ], [ 0, %invoke.cont99.1 ] + %sub93.2 = sub i32 %uint32_tVar_177.2745.2, %uint32_tVar_177.2745.1 + br label %invoke.cont99.2 + +invoke.cont120.2: ; No predecessors! + br label %invoke.cont27 +}