diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1912,6 +1912,9 @@ /// vectorize, the checks are moved back. If deciding not to vectorize, the /// temporary blocks are completely removed. class GeneratedRTChecks { + PredicatedScalarEvolution &PSE; + LoopVectorizationLegality &LVL; + /// Basic block which contains the generated SCEV checks, if any. BasicBlock *SCEVCheckBlock = nullptr; @@ -1929,84 +1932,21 @@ DominatorTree *DT; LoopInfo *LI; + Loop *L; SCEVExpander SCEVExp; SCEVExpander MemCheckExp; -public: - GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI, - const DataLayout &DL) - : DT(DT), LI(LI), SCEVExp(SE, DL, "scev.check"), - MemCheckExp(SE, DL, "scev.check") {} - - /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can - /// accurately estimate the cost of the runtime checks. The blocks are - /// un-linked from the IR and is added back during vector code generation. If - /// there is no vector code generation, the check blocks are removed - /// completely. - void Create(Loop *L, const LoopAccessInfo &LAI, - const SCEVUnionPredicate &UnionPred) { - - BasicBlock *LoopHeader = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); - - // Use SplitBlock to create blocks for SCEV & memory runtime checks to - // ensure the blocks are properly added to LoopInfo & DominatorTree. Those - // may be used by SCEVExpander. The blocks will be un-linked from their - // predecessors and removed from LI & DT at the end of the function. - if (!UnionPred.isAlwaysTrue()) { - SCEVCheckBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI, - nullptr, "vector.scevcheck"); - - SCEVCheckCond = SCEVExp.expandCodeForPredicate( - &UnionPred, SCEVCheckBlock->getTerminator()); - } - - const auto &RtPtrChecking = *LAI.getRuntimePointerChecking(); - if (RtPtrChecking.Need) { - auto *Pred = SCEVCheckBlock ? SCEVCheckBlock : Preheader; - MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr, - "vector.memcheck"); - - std::tie(std::ignore, MemRuntimeCheckCond) = - addRuntimeChecks(MemCheckBlock->getTerminator(), L, - RtPtrChecking.getChecks(), MemCheckExp); - assert(MemRuntimeCheckCond && - "no RT checks generated although RtPtrChecking " - "claimed checks are required"); - } + /// TODO: + bool IsChecksGenerated = false; - if (!MemCheckBlock && !SCEVCheckBlock) - return; - - // Unhook the temporary block with the checks, update various places - // accordingly. - if (SCEVCheckBlock) - SCEVCheckBlock->replaceAllUsesWith(Preheader); - if (MemCheckBlock) - MemCheckBlock->replaceAllUsesWith(Preheader); - - if (SCEVCheckBlock) { - SCEVCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator()); - new UnreachableInst(Preheader->getContext(), SCEVCheckBlock); - Preheader->getTerminator()->eraseFromParent(); - } - if (MemCheckBlock) { - MemCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator()); - new UnreachableInst(Preheader->getContext(), MemCheckBlock); - Preheader->getTerminator()->eraseFromParent(); - } - - DT->changeImmediateDominator(LoopHeader, Preheader); - if (MemCheckBlock) { - DT->eraseNode(MemCheckBlock); - LI->removeBlock(MemCheckBlock); - } - if (SCEVCheckBlock) { - DT->eraseNode(SCEVCheckBlock); - LI->removeBlock(SCEVCheckBlock); - } - } +public: + GeneratedRTChecks(PredicatedScalarEvolution &PSE, + LoopVectorizationLegality &LVL, DominatorTree *DT, + LoopInfo *LI, Loop *L, const DataLayout &DL) + : PSE(PSE), LVL(LVL), DT(DT), LI(LI), L(L), + SCEVExp(*PSE.getSE(), DL, "scev.check"), + MemCheckExp(*PSE.getSE(), DL, "scev.check") {} /// Remove the created SCEV & memory runtime check blocks & instructions, if /// unused. @@ -2046,6 +1986,8 @@ BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass, BasicBlock *LoopVectorPreHeader, BasicBlock *LoopExitBlock) { + generateChecks(); + if (!SCEVCheckCond) return nullptr; if (auto *C = dyn_cast(SCEVCheckCond)) @@ -2080,6 +2022,8 @@ /// the generated condition. BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass, BasicBlock *LoopVectorPreHeader) { + generateChecks(); + // Check if we generated code that checks in runtime if arrays overlap. if (!MemRuntimeCheckCond) return nullptr; @@ -2105,6 +2049,82 @@ MemRuntimeCheckCond = nullptr; return MemCheckBlock; } + +private: + /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can + /// accurately estimate the cost of the runtime checks. The blocks are + /// un-linked from the IR and is added back during vector code generation. If + /// there is no vector code generation, the check blocks are removed + /// completely. + void generateChecks() { + + if (IsChecksGenerated) + return; + + IsChecksGenerated = true; + + BasicBlock *LoopHeader = L->getHeader(); + BasicBlock *Preheader = L->getLoopPreheader(); + auto UnionPred = PSE.getUnionPredicate(); + + // Use SplitBlock to create blocks for SCEV & memory runtime checks to + // ensure the blocks are properly added to LoopInfo & DominatorTree. Those + // may be used by SCEVExpander. The blocks will be un-linked from their + // predecessors and removed from LI & DT at the end of the function. + if (!UnionPred.isAlwaysTrue()) { + SCEVCheckBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI, + nullptr, "vector.scevcheck"); + + SCEVCheckCond = SCEVExp.expandCodeForPredicate( + &UnionPred, SCEVCheckBlock->getTerminator()); + } + + const auto *RtPtrChecking = + LVL.getLAI() ? LVL.getLAI()->getRuntimePointerChecking() : nullptr; + if (RtPtrChecking && RtPtrChecking->Need) { + auto *Pred = SCEVCheckBlock ? SCEVCheckBlock : Preheader; + MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr, + "vector.memcheck"); + + std::tie(std::ignore, MemRuntimeCheckCond) = + addRuntimeChecks(MemCheckBlock->getTerminator(), L, + RtPtrChecking->getChecks(), MemCheckExp); + assert(MemRuntimeCheckCond && + "no RT checks generated although RtPtrChecking " + "claimed checks are required"); + } + + if (!MemCheckBlock && !SCEVCheckBlock) + return; + + // Unhook the temporary block with the checks, update various places + // accordingly. + if (SCEVCheckBlock) + SCEVCheckBlock->replaceAllUsesWith(Preheader); + if (MemCheckBlock) + MemCheckBlock->replaceAllUsesWith(Preheader); + + if (SCEVCheckBlock) { + SCEVCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator()); + new UnreachableInst(Preheader->getContext(), SCEVCheckBlock); + Preheader->getTerminator()->eraseFromParent(); + } + if (MemCheckBlock) { + MemCheckBlock->getTerminator()->moveBefore(Preheader->getTerminator()); + new UnreachableInst(Preheader->getContext(), MemCheckBlock); + Preheader->getTerminator()->eraseFromParent(); + } + + DT->changeImmediateDominator(LoopHeader, Preheader); + if (MemCheckBlock) { + DT->eraseNode(MemCheckBlock); + LI->removeBlock(MemCheckBlock); + } + if (SCEVCheckBlock) { + DT->eraseNode(SCEVCheckBlock); + LI->removeBlock(SCEVCheckBlock); + } + } }; // Return true if \p OuterLp is an outer loop annotated with hints for explicit @@ -10029,6 +10049,8 @@ ScalarEpilogueLowering SEL = getScalarEpilogueLowering( F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL); + GeneratedRTChecks RTChecks(PSE, *LVL, DT, LI, L, + F->getParent()->getDataLayout()); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); // Use the planner for outer loop vectorization. @@ -10055,10 +10077,8 @@ LVP.setBestPlan(VF.Width, 1); { - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, - F->getParent()->getDataLayout()); InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL, - &CM, BFI, PSI, Checks); + &CM, BFI, PSI, RTChecks); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.executePlan(LB, DT); @@ -10265,6 +10285,8 @@ IAI.analyzeInterleaving(useMaskedInterleavedAccesses(*TTI)); } + GeneratedRTChecks RTChecks(PSE, LVL, DT, LI, L, + F->getParent()->getDataLayout()); // Use the cost model. LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); @@ -10376,13 +10398,6 @@ bool DisableRuntimeUnroll = false; MDNode *OrigLoopID = L->getLoopID(); { - // Optimistically generate runtime checks. Drop them if they turn out to not - // be profitable. Limit the scope of Checks, so the cleanup happens - // immediately after vector codegeneration is done. - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, - F->getParent()->getDataLayout()); - if (!VF.Width.isScalar() || IC > 1) - Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); LVP.setBestPlan(VF.Width, IC); using namespace ore; @@ -10391,7 +10406,7 @@ // If we decided that it is not legal to vectorize the loop, then // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, - &CM, BFI, PSI, Checks); + &CM, BFI, PSI, RTChecks); LVP.executePlan(Unroller, DT); ORE->emit([&]() { @@ -10415,7 +10430,7 @@ EpilogueVF.Width.getKnownMinValue(), 1); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, &LVL, &CM, BFI, PSI, Checks); + EPI, &LVL, &CM, BFI, PSI, RTChecks); LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF); LVP.executePlan(MainILV, DT); @@ -10431,7 +10446,7 @@ EPI.MainLoopUF = EPI.EpilogueUF; EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, - Checks); + RTChecks); LVP.executePlan(EpilogILV, DT); ++LoopsEpilogueVectorized; @@ -10439,7 +10454,7 @@ DisableRuntimeUnroll = true; } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, - &LVL, &CM, BFI, PSI, Checks); + &LVL, &CM, BFI, PSI, RTChecks); LVP.executePlan(LB, DT); ++LoopsVectorized; diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll --- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -26,9 +26,9 @@ ; CHECK-NEXT: br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]] ; CHECK: for.body3.lr.ph.us.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[K:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3_LR_PH_US:%.*]] ; CHECK: for.end.us: ; CHECK-NEXT: [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]] @@ -38,7 +38,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ [[BC_RESUME_VAL:%.*]], [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 @@ -51,15 +51,15 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop !3 +; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], [[INDVARS_IV33]] ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 ; CHECK-NEXT: [[ADD_US]] = add i32 [[TMP9]], [[K]] ; CHECK-NEXT: [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]]) @@ -74,8 +74,8 @@ ; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] ; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -98,9 +98,9 @@ ; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ] @@ -168,7 +168,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32 ; CHECK-NEXT: [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop !2 +; CHECK-NEXT: br i1 [[EXITCOND36]], label [[FOR_END15_LOOPEXIT:%.*]], label [[FOR_BODY3_LR_PH_US]], !llvm.loop [[LOOP2]] ; CHECK: for.body3.us: ; CHECK-NEXT: [[INDVARS_IV29:%.*]] = phi i64 [ 0, [[FOR_BODY3_LR_PH_US]] ], [ [[INDVARS_IV_NEXT30:%.*]], [[FOR_BODY3_US:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV29]] to i32 @@ -181,7 +181,7 @@ ; CHECK-NEXT: [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32 ; CHECK-NEXT: [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop !1 +; CHECK-NEXT: br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: for.body3.lr.ph.us: ; CHECK-NEXT: [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV33]] to i32 diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]] ; CHECK: loop.3.preheader: -; CHECK-NEXT: [[L_1_LCSSA10:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] +; CHECK-NEXT: [[L_1_LCSSA11:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] ; CHECK-NEXT: [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ] ; CHECK-NEXT: [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ] ; CHECK-NEXT: [[L_2_LCSSA3:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8* @@ -34,12 +34,12 @@ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_2_LCSSA]], i64 1 ; CHECK-NEXT: [[SCEVGEP4:%.*]] = bitcast i16* [[SCEVGEP]] to i8* ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1 -; CHECK-NEXT: [[SCEVGEP58:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* +; CHECK-NEXT: [[SCEVGEP59:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], 2 -; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i16, i16* [[L_1_LCSSA10]], i64 [[TMP1]] -; CHECK-NEXT: [[SCEVGEP912:%.*]] = bitcast i16* [[SCEVGEP9]] to i8* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP912]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP58]], [[SCEVGEP4]] +; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i16, i16* [[L_1_LCSSA11]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP1013:%.*]] = bitcast i16* [[SCEVGEP10]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP1013]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP59]], [[SCEVGEP4]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true ; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] @@ -84,12 +84,13 @@ ; CHECK-NEXT: store i16 [[LOOP_L_1]], i16* [[GEP_2]], align 2 ; CHECK-NEXT: br i1 [[C_5]], label [[LOOP_3]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit.loopexit: +; CHECK-NEXT: [[L_18:%.*]] = phi i16* [ [[L_1_LCSSA]], [[MIDDLE_BLOCK]] ], [ [[L_1_LCSSA]], [[LOOP_3]] ] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit.loopexit1: ; CHECK-NEXT: [[L_1_LCSSA6:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA6]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA6]], [[EXIT_LOOPEXIT1]] ], [ [[L_18]], [[EXIT_LOOPEXIT]] ] ; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[L_17]], align 2 ; CHECK-NEXT: ret i16 [[L_3]] ;