diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -186,14 +186,11 @@ /// previous iteration (e.g. if the value is defined in the previous /// iteration, we refer to it as first-order recurrence, if it is defined in /// the iteration before the previous, we refer to it as second-order - /// recurrence and so on). \p SinkAfter includes pairs of instructions where - /// the first will be rescheduled to appear after the second if/when the loop - /// is vectorized. It may be augmented with additional pairs if needed in - /// order to handle Phi as a first-order recurrence. - static bool - isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, - MapVector &SinkAfter, - DominatorTree *DT); + /// recurrence and so on). Note that this function optimistically assumes that + /// uses of the recurrence can be re-ordered if necessary and users need to + /// check and perform the re-ordering. + static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT); RecurKind getRecurrenceKind() const { return Kind; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -516,10 +516,6 @@ /// Holds the phi nodes that are fixed-order recurrences. RecurrenceSet FixedOrderRecurrences; - /// Holds instructions that need to sink past other instructions to handle - /// fixed-order recurrences. - MapVector SinkAfter; - /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -927,9 +927,8 @@ return false; } -bool RecurrenceDescriptor::isFixedOrderRecurrence( - PHINode *Phi, Loop *TheLoop, - MapVector &SinkAfter, DominatorTree *DT) { +bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DominatorTree *DT) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || @@ -965,8 +964,7 @@ Previous = dyn_cast(PrevPhi->getIncomingValueForBlock(Latch)); } - if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || - SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. + if (!Previous || !TheLoop->contains(Previous) || isa(Previous)) return false; // Ensure every user of the phi node (recursively) is dominated by the @@ -975,23 +973,9 @@ // loop. // TODO: Consider extending this sinking to handle memory instructions. - // We optimistically assume we can sink all users after Previous. Keep a set - // of instructions to sink after Previous ordered by dominance in the common - // basic block. It will be applied to SinkAfter if all users can be sunk. - auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) { - return A->comesBefore(B); - }; - std::set InstrsToSink( - CompareByComesBefore); - BasicBlock *PhiBB = Phi->getParent(); SmallVector WorkList; auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) { - // Already sunk SinkCandidate. - if (SinkCandidate->getParent() == PhiBB && - InstrsToSink.find(SinkCandidate) != InstrsToSink.end()) - return true; - // Cyclic dependence. if (Previous == SinkCandidate) return false; @@ -1005,55 +989,12 @@ SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) return false; - // Avoid sinking an instruction multiple times (if multiple operands are - // fixed order recurrences) by sinking once - after the latest 'previous' - // instruction. - auto It = SinkAfter.find(SinkCandidate); - if (It != SinkAfter.end()) { - auto *OtherPrev = It->second; - // Find the earliest entry in the 'sink-after' chain. The last entry in - // the chain is the original 'Previous' for a recurrence handled earlier. - auto EarlierIt = SinkAfter.find(OtherPrev); - while (EarlierIt != SinkAfter.end()) { - Instruction *EarlierInst = EarlierIt->second; - EarlierIt = SinkAfter.find(EarlierInst); - // Bail out if order has not been preserved. - if (EarlierIt != SinkAfter.end() && - !DT->dominates(EarlierInst, OtherPrev)) - return false; - OtherPrev = EarlierInst; - } - // Bail out if order has not been preserved. - if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev)) - return false; - - // SinkCandidate is already being sunk after an instruction after - // Previous. Nothing left to do. - if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev) - return true; - - // If there are other instructions to be sunk after SinkCandidate, remove - // and re-insert SinkCandidate can break those instructions. Bail out for - // simplicity. - if (any_of(SinkAfter, - [SinkCandidate](const std::pair &P) { - return P.second == SinkCandidate; - })) - return false; - - // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be - // re-sunk to Previous, instead of sinking to OtherPrev. Remove - // SinkCandidate from SinkAfter to ensure it's insert position is updated. - SinkAfter.erase(SinkCandidate); - } - // If we reach a PHI node that is not dominated by Previous, we reached a // header PHI. No need for sinking. if (isa(SinkCandidate)) return true; // Sink User tentatively and check its users - InstrsToSink.insert(SinkCandidate); WorkList.push_back(SinkCandidate); return true; }; @@ -1068,11 +1009,6 @@ } } - // We can sink all users of Phi. Update the mapping. - for (Instruction *I : InstrsToSink) { - SinkAfter[I] = Previous; - Previous = I; - } return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -743,8 +743,7 @@ continue; } - if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, - SinkAfter, DT)) { + if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) { AllowedExit.insert(Phi); FixedOrderRecurrences.insert(Phi); continue; @@ -917,18 +916,6 @@ } } - // For fixed order recurrences, we use the previous value (incoming value from - // the latch) to check if it dominates all users of the recurrence. Bail out - // if we have to sink such an instruction for another recurrence, as the - // dominance requirement may not hold after sinking. - BasicBlock *LoopLatch = TheLoop->getLoopLatch(); - if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { - Instruction *V = - cast(Phi->getIncomingValueForBlock(LoopLatch)); - return SinkAfter.contains(V); - })) - return false; - // Now we know the widest induction type, check if our found induction // is the same size. If it's not, unset it here and InnerLoopVectorizer // will create another. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9067,7 +9067,8 @@ // Sink users of fixed-order recurrence past the recipe defining the previous // value and introduce FirstOrderRecurrenceSplice VPInstructions. - VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder); + if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder)) + return std::nullopt; // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -77,7 +77,10 @@ /// to combine the value from the recurrence phis and previous values. The /// current implementation assumes all users can be sunk after the previous /// value, which is enforced by earlier legality checks. - static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); + /// \returns true if all users of fixed-order recurrences could be re-arranged + /// as needed or false if it is not possible. In the latter case, \p Plan is + /// not valid. + static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the /// resulting plan to \p BestVF and \p BestUF. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -652,9 +652,10 @@ return VPDT.properlyDominates(ParentA, ParentB); } -// Sink users of \p FOR after the recipe defining the previous value \p Previous -// of the recurrence. -static void +/// Sink users of \p FOR after the recipe defining the previous value \p +/// Previous of the recurrence. \returns true if all users of \p FOR could be +/// re-arranged as needed or false if it is not possible. +static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT) { @@ -663,15 +664,18 @@ SmallPtrSet Seen; Seen.insert(Previous); auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) { - assert( - SinkCandidate != Previous && - "The previous value cannot depend on the users of the recurrence phi."); + // The previous value must not depend on the users of the recurrence phi. In + // that case, FOR is not a fixed order recurrence. + if (SinkCandidate == Previous) + return false; + if (isa(SinkCandidate) || !Seen.insert(SinkCandidate).second || properlyDominates(Previous, SinkCandidate, VPDT)) - return; + return true; WorkList.push_back(SinkCandidate); + return true; }; // Recursively sink users of FOR after Previous. @@ -682,7 +686,8 @@ "only recipes with a single defined value expected"); for (VPUser *User : Current->getVPSingleValue()->users()) { if (auto *R = dyn_cast(User)) - TryToPushSinkCandidate(R); + if (!TryToPushSinkCandidate(R)) + return false; } } @@ -699,9 +704,10 @@ SinkCandidate->moveAfter(Previous); Previous = SinkCandidate; } + return true; } -void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, +bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder) { VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -724,7 +730,8 @@ Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe(); } - sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT); + if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT)) + return false; // Introduce a recipe to combine the incoming and previous values of a // fixed-order recurrence. @@ -743,4 +750,5 @@ // all users. RecurSplice->setOperand(0, FOR); } + return true; } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -647,12 +647,40 @@ ret ptr %for.1 } -; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in -; loop vectorizer. -define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) { -; CHECK-LABEL: @test_crash -; CHECK-NOT: vector.body: -; CHECK: ret +; In this test case, %USE_2_FORS uses 2 different fixed-order recurrences and +; it needs to be sunk past the previous value for both recurrences. +define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { +; CHECK-LABEL: @test_resinking_required( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ , %vector.ph ], [ [[TMP4:%.*]], %vector.body ] +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3 +; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph +; Entry: br label %Loop @@ -661,7 +689,7 @@ %for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ] %for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ] %iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ] - %USE_2_INDVARS = fdiv double %for.3, %for.1 + %USE_2_FORS = fdiv double %for.3, %for.1 %div = fdiv double 0.000000e+00, %for.1 %l1 = load double, ptr %a, align 8 %iv.next= add nuw nsw i64 %iv, 1 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -221,22 +221,55 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: @test_pr54233_for_depend_on_each_other( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]] -; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]] -; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4 -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]] +; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ;