diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8090,18 +8090,12 @@ break; } case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: { // while (X < Y) - ExitLimit EL = - howManyLessThans(LHS, RHS, L, Pred, ControlsExit, AllowPredicates); - if (EL.hasAnyInfo()) return EL; - break; - } + case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_UGT: { // while (X > Y) - bool IsSigned = Pred == ICmpInst::ICMP_SGT; + case ICmpInst::ICMP_UGT: { + // while (X > Y) or while (X < Y) ExitLimit EL = - howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, - AllowPredicates); + howManyLessThans(LHS, RHS, L, Pred, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; break; } @@ -11648,7 +11642,7 @@ bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; - assert(ICmpInst::isLT(Pred) && "Unexpected pred"); + assert((ICmpInst::isGT(Pred) || ICmpInst::isLT(Pred)) && "Unexpected pred"); bool IsSigned = ICmpInst::isSigned(Pred); const SCEVAddRecExpr *IV = dyn_cast(LHS); @@ -11666,6 +11660,27 @@ if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); + // Force IV/RHS to integer types, so we can do arithmetic. + if (IV->getType()->isPointerTy()) { + const SCEV *IVCast = getLosslessPtrToIntExpr(IV); + if (isa(IVCast)) + return IVCast; + IV = cast(IVCast); + } + if (RHS->getType()->isPointerTy()) { + RHS = getLosslessPtrToIntExpr(RHS); + if (isa(RHS)) + return RHS; + } + + // If this is a greater-than comparison, invert the LHS/RHS to make it a + // less-than comparison. + if (ICmpInst::isGT(Pred)) { + IV = cast(getNotSCEV(IV)); + RHS = getNotSCEV(RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW; bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType); @@ -11765,21 +11780,6 @@ const SCEV *Start = IV->getStart(); - // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. - // Use integer-typed versions for actual computation. - const SCEV *OrigStart = Start; - const SCEV *OrigRHS = RHS; - if (Start->getType()->isPointerTy()) { - Start = getLosslessPtrToIntExpr(Start); - if (isa(Start)) - return Start; - } - if (RHS->getType()->isPointerTy()) { - RHS = getLosslessPtrToIntExpr(RHS); - if (isa(RHS)) - return RHS; - } - // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can // calculate the MaxBECount, given the start, stride and max value for the end @@ -11805,15 +11805,14 @@ // result is as above, and if not max(End,Start) is Start so we get a backedge // count of zero. const SCEV *BECount; - if (isLoopEntryGuardedByCond(L, Pred, getMinusSCEV(OrigStart, Stride), - OrigRHS)) + if (isLoopEntryGuardedByCond(L, Pred, getMinusSCEV(Start, Stride), RHS)) BECount = BECountIfBackedgeTaken; else { const SCEV *End; // If we know that RHS >= Start in the context of loop, then we know that // max(RHS, Start) = RHS at this point. - if (isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), - OrigRHS, OrigStart)) + if (isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), RHS, + Start)) End = RHS; else End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); diff --git a/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll b/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll --- a/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll +++ b/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll @@ -28,7 +28,7 @@ ; CHECK: Determining loop execution counts for: @test_other_exit ; CHECK: Loop %for.body: Unpredictable backedge-taken count. ; CHECK: Determining loop execution counts for: @test_gt -; CHECK: Loop %for.body: Unpredictable backedge-taken count. +; CHECK: Loop %for.body: backedge-taken count is ((((1 + (-1 * %S)) umax (-1 + (-1 * %N))) + %S) /u 2) define void @test(i32 %N) mustprogress { entry: diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll --- a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -437,7 +437,7 @@ define void @changing_end_bound7(i32 %start, i32* %n_addr, i32* %addr) { ; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound7 ; CHECK: Loop %loop: Unpredictable backedge-taken count. -; CHECK: Loop %loop: Unpredictable max backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is -1 entry: br label %loop diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll --- a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -81,7 +81,7 @@ ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) ; CHECK: Loop %bb3: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: diff --git a/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll b/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll --- a/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll @@ -7,9 +7,13 @@ ; Test case from PR1614. define i32 @test_PR1614(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'test_PR1614' -; CHECK: --> (%a smax %b) -; CHECK: --> (%a smax %b smax %c) -; CHECK-NOT: smax +; CHECK-NEXT: Classifying expressions for: @test_PR1614 +; CHECK-NEXT: %B = select i1 %A, i32 %a, i32 %b +; CHECK-NEXT: --> (%a smax %b) U: full-set S: full-set +; CHECK-NEXT: %D = select i1 %C, i32 %B, i32 %c +; CHECK-NEXT: --> (%a smax %b smax %c) U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @test_PR1614 +; %A = icmp sgt i32 %a, %b %B = select i1 %A, i32 %a, i32 %b @@ -59,17 +63,16 @@ define void @smin_to_smax(i32 %n) { ; FIXME: ((-1 * (0 smin %n)) + %n) is actually just (0 smax %n) - ; CHECK-LABEL: 'smin_to_smax' ; CHECK-NEXT: Classifying expressions for: @smin_to_smax ; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] -; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: (0 smin %n) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: (-1 + (-1 * (-1 smax (-1 + (-1 * %n))))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 -; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: (-1 + (0 smin %n)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: (-2 + (-1 * (-1 smax (-1 + (-1 * %n))))) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @smin_to_smax -; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: backedge-taken count is (1 + (-1 smax (-1 + (-1 * %n))) + %n) ; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (1 + (-1 smax (-1 + (-1 * %n))) + %n) ; CHECK-NEXT: Predicates: ; CHECK: Loop %for.body: Trip multiple is 1 ; @@ -95,18 +98,19 @@ ; The information from the loop guard can be used to simplify the trip count expression. define void @smax_simplify_with_guard(i32 %start, i32 %n) { -; CHECK-LABEL: 'smax_simplify_with_guard' +; CHECK-LABEL: 'smax_simplify_with_guard' ; CHECK-NEXT: Classifying expressions for: @smax_simplify_with_guard ; CHECK-NEXT: %k.0.i26 = phi i32 [ %start, %loop.ph ], [ %inc.i, %loop ] -; CHECK-NEXT: --> {%start,+,1}<%loop> U: full-set S: full-set Exits: %n LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,1}<%loop> U: full-set S: full-set Exits: %n LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %inc.i = add nsw i32 %k.0.i26, 1 -; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: (1 + %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: (1 + %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @smax_simplify_with_guard ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + %n) ; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-1 * %start) + %n) +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-1 * %start) + %n) ; CHECK-NEXT: Predicates: ; CHECK: Loop %loop: Trip multiple is 1 +; entry: %guard = icmp sge i32 %n, %start br i1 %guard, label %loop.ph, label %exit diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll @@ -93,8 +93,8 @@ br i1 %iv.cmp, label %loop, label %leave ; CHECK-LABEL: Determining loop execution counts for: @s_2 -; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * ((-100 + %start) smin %start)) + %start) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: backedge-taken count is (1 + ((99 + (-1 * %start)) smax (-1 + (-1 * %start))) + %start) +; CHECK-NEXT: Loop %loop: max backedge-taken count is 100, actual taken count either this or zero. leave: ret void diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll --- a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll @@ -40,9 +40,9 @@ define void @sha_stream_bb3_2E_i(%struct.SHA_INFO* %sha_info, i8* %data1, i32, i8** %buffer_addr.0.i.out, i32* %count_addr.0.i.out) nounwind { ; CHECK-LABEL: 'sha_stream_bb3_2E_i' ; CHECK-NEXT: Determining loop execution counts for: @sha_stream_bb3_2E_i -; CHECK-NEXT: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) +; CHECK-NEXT: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) ; CHECK-NEXT: Loop %bb3.i: max backedge-taken count is 33554431 -; CHECK-NEXT: Loop %bb3.i: Predicated backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) +; CHECK-NEXT: Loop %bb3.i: Predicated backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) ; CHECK-NEXT: Predicates: ; CHECK: Loop %bb3.i: Trip multiple is 1 ; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -5,11 +5,11 @@ define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x) { ; CHECK-LABEL: foo: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: ldrd r12, r2, [r0] +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: ldrd r12, lr, [r0] ; CHECK-NEXT: ldrd r4, r3, [r0, #8] -; CHECK-NEXT: rsb r12, r12, r2, lsl #1 +; CHECK-NEXT: rsb r12, r12, lr, lsl #1 ; CHECK-NEXT: dlstp.16 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -30,7 +30,7 @@ ; CHECK-NEXT: vstrh.16 q1, [r0], #16 ; CHECK-NEXT: letp lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %do.end13 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %ps_size = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 1 %0 = load i32, i32* %ps_size, align 4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll @@ -10,17 +10,17 @@ define arm_aapcs_vfpcc float @vctpi32(float* %0, i32 %1) { ; CHECK-LABEL: vctpi32: ; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmvn.i32 q1, #0x1f ; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: vadd.i32 q1, q3, q1 -; CHECK-NEXT: subs r3, r1, #1 -; CHECK-NEXT: vidup.u32 q2, r2, #8 +; CHECK-NEXT: vidup.u32 q2, r4, #8 ; CHECK-NEXT: vmov r0, s4 ; CHECK-NEXT: vadd.i32 q1, q2, r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: dlstp.32 lr, r3 +; CHECK-NEXT: subs r0, r1, #1 +; CHECK-NEXT: dlstp.32 lr, r0 ; CHECK-NEXT: .LBB0_1: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q2, [q1, #32]! ; CHECK-NEXT: vadd.f32 q0, q0, q2 @@ -30,7 +30,7 @@ ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vabs.f32 s0, s0 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} %3 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 8) %4 = extractvalue { <4 x i32>, i32 } %3, 0 %5 = add nsw i32 %1, -1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -4,14 +4,14 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { ; CHECK-LABEL: arm_var_f32_mve: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [r12], #16 +; CHECK-NEXT: vldrw.u32 q1, [r4], #16 ; CHECK-NEXT: vadd.f32 q0, q0, q1 ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit @@ -19,13 +19,13 @@ ; CHECK-NEXT: vadd.f32 s0, s3, s3 ; CHECK-NEXT: vcvt.f32.u32 s4, s4 ; CHECK-NEXT: vdiv.f32 s0, s0, s4 -; CHECK-NEXT: vmov r12, s0 +; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vsub.f32 q1, q1, r12 +; CHECK-NEXT: vsub.f32 q1, q1, r4 ; CHECK-NEXT: vfma.f32 q0, q1, q1 ; CHECK-NEXT: letp lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %do.end @@ -35,7 +35,7 @@ ; CHECK-NEXT: vcvt.f32.u32 s2, s2 ; CHECK-NEXT: vdiv.f32 s0, s0, s2 ; CHECK-NEXT: vstr s0, [r2] -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: br label %do.body.i diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll @@ -6,8 +6,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs r2, r1, #4 -; CHECK-NEXT: movw r3, #0 +; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: movt r3, #65408 ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: dlstp.32 lr, r1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -6,7 +6,7 @@ ; CHECK: @ %bb.0: @ %bb ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: ldrd r5, r12, [sp, #80] +; CHECK-NEXT: ldrd r4, r12, [sp, #80] ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 ; CHECK-NEXT: vmov.i32 q1, #0x3f ; CHECK-NEXT: vmov.i32 q2, #0x1 @@ -20,7 +20,7 @@ ; CHECK-NEXT: vadd.i32 q3, q3, q2 ; CHECK-NEXT: vshr.u32 q6, q5, #24 ; CHECK-NEXT: vand q6, q6, q1 -; CHECK-NEXT: vldrw.u32 q7, [r5, q6, uxtw #2] +; CHECK-NEXT: vldrw.u32 q7, [r4, q6, uxtw #2] ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 ; CHECK-NEXT: vqsub.s32 q6, q0, q6 ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 @@ -92,26 +92,28 @@ define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5, i32 %conv.mask) { ; CHECK-LABEL: dont_remat_predicated_vctp: ; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldrd r6, r12, [sp, #88] -; CHECK-NEXT: movs r4, #4 -; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: ldrd r4, r12, [sp, #88] +; CHECK-NEXT: mvn lr, #4 +; CHECK-NEXT: mvn.w r5, r12 +; CHECK-NEXT: cmn.w r5, #5 +; CHECK-NEXT: csinv r5, lr, r12, le ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 -; CHECK-NEXT: csel r5, r12, r4, lt +; CHECK-NEXT: add r5, r12 ; CHECK-NEXT: vmov.i32 q1, #0x3f -; CHECK-NEXT: sub.w r5, r12, r5 -; CHECK-NEXT: vmov.i32 q2, #0x1 -; CHECK-NEXT: add.w lr, r5, #3 +; CHECK-NEXT: add.w lr, r5, #4 ; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: vmov.i32 q2, #0x1 ; CHECK-NEXT: add.w lr, r5, lr, lsr #2 +; CHECK-NEXT: movs r5, #4 ; CHECK-NEXT: .LBB1_1: @ %bb6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst -; CHECK-NEXT: vctpt.32 r4 +; CHECK-NEXT: vctpt.32 r5 ; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 @@ -121,7 +123,7 @@ ; CHECK-NEXT: vadd.i32 q3, q3, q2 ; CHECK-NEXT: vshr.u32 q6, q5, #24 ; CHECK-NEXT: vand q6, q6, q1 -; CHECK-NEXT: vldrw.u32 q7, [r6, q6, uxtw #2] +; CHECK-NEXT: vldrw.u32 q7, [r4, q6, uxtw #2] ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 ; CHECK-NEXT: vqsub.s32 q6, q0, q6 ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 @@ -143,7 +145,7 @@ ; CHECK-NEXT: @ %bb.2: @ %bb44 ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} bb: %i = zext i16 %arg5 to i32 br label %bb6 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -5,7 +5,6 @@ ; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: subs.w r3, r2, #8 ; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -50,12 +49,12 @@ ; CHECK-LABEL: bad: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: cmp r2, #4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #4 -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: add.w r12, r3, #3 +; CHECK-NEXT: mvns r3, r2 +; CHECK-NEXT: mvn r12, #4 +; CHECK-NEXT: cmn.w r3, #5 +; CHECK-NEXT: csinv r3, r12, r2, le +; CHECK-NEXT: add r3, r2 +; CHECK-NEXT: add.w r12, r3, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w r3, r3, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -5,8 +5,8 @@ define void @simple(i32* nocapture readonly %x, i32* nocapture readnone %y, i32* nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: simple: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: ldr r1, [sp, #8] ; CHECK-NEXT: mov r12, r3 ; CHECK-NEXT: adds r3, r1, #3 @@ -21,7 +21,7 @@ ; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: .LBB0_3: @ %if.end ; CHECK-NEXT: str.w r12, [r2] -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %add = add i32 %n, 3 %div = lshr i32 %add, 2 @@ -54,11 +54,11 @@ define void @nested(i32* nocapture readonly %x, i32* nocapture readnone %y, i32* nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: nested: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: cbz r3, .LBB1_8 ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-NEXT: ldr.w r12, [sp, #24] +; CHECK-NEXT: ldr.w r12, [sp, #28] ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_2: @ in Loop: Header=BB1_4 Depth=1 @@ -72,14 +72,14 @@ ; CHECK-NEXT: .LBB1_4: @ %for.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB1_6 Depth 2 -; CHECK-NEXT: add.w r6, r12, #3 -; CHECK-NEXT: lsrs r7, r6, #2 +; CHECK-NEXT: add.w r7, r12, #3 +; CHECK-NEXT: lsrs r6, r7, #2 ; CHECK-NEXT: beq .LBB1_2 ; CHECK-NEXT: @ %bb.5: @ %do.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: bic r5, r6, #3 ; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: add.w r8, r0, r5, lsl #2 +; CHECK-NEXT: bic r6, r7, #3 +; CHECK-NEXT: add.w r9, r0, r6, lsl #2 ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB1_6: @ %do.body ; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1 @@ -89,11 +89,11 @@ ; CHECK-NEXT: letp lr, .LBB1_6 ; CHECK-NEXT: @ %bb.7: @ %if.end.loopexit ; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1 -; CHECK-NEXT: sub.w r12, r12, r5 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: sub.w r12, r12, r6 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %cmp20.not = icmp eq i32 %m, 0 br i1 %cmp20.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1049,10 +1049,10 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #28 +; CHECK-NEXT: sub sp, #28 ; CHECK-NEXT: cmp r3, #8 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: blo.w .LBB16_12 ; CHECK-NEXT: @ %bb.1: @ %entry ; CHECK-NEXT: lsrs.w r12, r3, #2 @@ -1060,136 +1060,144 @@ ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: ldrh r4, [r0] ; CHECK-NEXT: movs r1, #1 -; CHECK-NEXT: ldrd r5, r3, [r0, #4] +; CHECK-NEXT: ldrd r7, r3, [r0, #4] ; CHECK-NEXT: sub.w r0, r4, #8 -; CHECK-NEXT: add.w r7, r0, r0, lsr #29 -; CHECK-NEXT: and r0, r0, #7 -; CHECK-NEXT: asrs r6, r7, #3 -; CHECK-NEXT: cmp r6, #1 +; CHECK-NEXT: add.w r6, r0, r0, lsr #29 +; CHECK-NEXT: asrs r5, r6, #3 +; CHECK-NEXT: cmp r5, #1 ; CHECK-NEXT: it gt -; CHECK-NEXT: asrgt r1, r7, #3 -; CHECK-NEXT: add.w r7, r5, r4, lsl #1 +; CHECK-NEXT: asrgt r1, r6, #3 ; CHECK-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-NEXT: subs r1, r7, #2 -; CHECK-NEXT: rsbs r7, r4, #0 -; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: add.w r7, r3, #16 -; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_5 -; CHECK-NEXT: .LBB16_3: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: wls lr, r0, .LBB16_4 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_4: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #7 +; CHECK-NEXT: mvn r6, #1 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mvns r0, r1 +; CHECK-NEXT: cmn.w r0, #2 +; CHECK-NEXT: csinv r0, r6, r1, le +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r7, r4, lsl #1 +; CHECK-NEXT: subs r1, r0, #2 +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r3, #16 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: b .LBB16_4 +; CHECK-NEXT: .LBB16_3: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: subs.w r12, r12, #1 ; CHECK-NEXT: vstrb.8 q0, [r2], #8 -; CHECK-NEXT: add.w r0, r5, r0, lsl #1 -; CHECK-NEXT: add.w r5, r0, #8 +; CHECK-NEXT: add.w r0, r7, r0, lsl #1 +; CHECK-NEXT: add.w r7, r0, #8 ; CHECK-NEXT: beq.w .LBB16_12 -; CHECK-NEXT: .LBB16_5: @ %while.body +; CHECK-NEXT: .LBB16_4: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: ldrh.w lr, [r3, #14] ; CHECK-NEXT: vldrw.u32 q0, [r0], #8 -; CHECK-NEXT: ldrh.w r8, [r3, #12] -; CHECK-NEXT: ldrh r7, [r3, #10] -; CHECK-NEXT: ldrh r4, [r3, #8] -; CHECK-NEXT: ldrh r6, [r3, #6] -; CHECK-NEXT: ldrh.w r9, [r3, #4] +; CHECK-NEXT: ldrh.w r9, [r3, #12] +; CHECK-NEXT: ldrh r4, [r3, #10] +; CHECK-NEXT: ldrh r6, [r3, #8] +; CHECK-NEXT: ldrh r5, [r3, #6] +; CHECK-NEXT: ldrh.w r10, [r3, #4] ; CHECK-NEXT: ldrh.w r11, [r3, #2] -; CHECK-NEXT: ldrh.w r10, [r3] +; CHECK-NEXT: ldrh.w r8, [r3] ; CHECK-NEXT: vstrb.8 q0, [r1], #8 -; CHECK-NEXT: vldrw.u32 q0, [r5] -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: adds r0, r5, #2 +; CHECK-NEXT: vldrw.u32 q0, [r7] +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: adds r0, r7, #2 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vmul.f16 q0, q0, r10 -; CHECK-NEXT: adds r0, r5, #6 +; CHECK-NEXT: vmul.f16 q0, q0, r8 +; CHECK-NEXT: adds r0, r7, #6 ; CHECK-NEXT: vfma.f16 q0, q1, r11 -; CHECK-NEXT: vldrw.u32 q1, [r5, #4] -; CHECK-NEXT: vfma.f16 q0, q1, r9 +; CHECK-NEXT: vldrw.u32 q1, [r7, #4] +; CHECK-NEXT: vfma.f16 q0, q1, r10 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: add.w r0, r5, #10 +; CHECK-NEXT: add.w r0, r7, #10 +; CHECK-NEXT: vfma.f16 q0, q1, r5 +; CHECK-NEXT: vldrw.u32 q1, [r7, #8] ; CHECK-NEXT: vfma.f16 q0, q1, r6 -; CHECK-NEXT: vldrw.u32 q1, [r5, #8] -; CHECK-NEXT: vfma.f16 q0, q1, r4 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: add.w r0, r5, #14 -; CHECK-NEXT: vfma.f16 q0, q1, r7 -; CHECK-NEXT: vldrw.u32 q1, [r5, #12] -; CHECK-NEXT: adds r5, #16 -; CHECK-NEXT: vfma.f16 q0, q1, r8 +; CHECK-NEXT: add.w r0, r7, #14 +; CHECK-NEXT: vfma.f16 q0, q1, r4 +; CHECK-NEXT: vldrw.u32 q1, [r7, #12] +; CHECK-NEXT: adds r7, #16 +; CHECK-NEXT: vfma.f16 q0, q1, r9 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vfma.f16 q0, q1, lr ; CHECK-NEXT: cmp r0, #16 -; CHECK-NEXT: blo .LBB16_8 -; CHECK-NEXT: @ %bb.6: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: blo .LBB16_7 +; CHECK-NEXT: @ %bb.5: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: .LBB16_6: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrh r0, [r6], #16 -; CHECK-NEXT: vldrw.u32 q1, [r5] -; CHECK-NEXT: adds r4, r5, #2 +; CHECK-NEXT: ldrh r0, [r5], #16 +; CHECK-NEXT: vldrw.u32 q1, [r7] +; CHECK-NEXT: adds r4, r7, #2 ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: ldrh r0, [r6, #-14] -; CHECK-NEXT: adds r4, r5, #6 +; CHECK-NEXT: ldrh r0, [r5, #-14] +; CHECK-NEXT: adds r4, r7, #6 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #-12] -; CHECK-NEXT: vldrw.u32 q1, [r5, #4] +; CHECK-NEXT: ldrh r0, [r5, #-12] +; CHECK-NEXT: vldrw.u32 q1, [r7, #4] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: ldrh r0, [r6, #-10] -; CHECK-NEXT: add.w r4, r5, #10 +; CHECK-NEXT: ldrh r0, [r5, #-10] +; CHECK-NEXT: add.w r4, r7, #10 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #-8] -; CHECK-NEXT: vldrw.u32 q1, [r5, #8] +; CHECK-NEXT: ldrh r0, [r5, #-8] +; CHECK-NEXT: vldrw.u32 q1, [r7, #8] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r4] -; CHECK-NEXT: ldrh r0, [r6, #-6] -; CHECK-NEXT: ldrh r4, [r6, #-2] +; CHECK-NEXT: ldrh r0, [r5, #-6] +; CHECK-NEXT: ldrh r4, [r5, #-2] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #-4] -; CHECK-NEXT: vldrw.u32 q1, [r5, #12] +; CHECK-NEXT: ldrh r0, [r5, #-4] +; CHECK-NEXT: vldrw.u32 q1, [r7, #12] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: add.w r0, r5, #14 +; CHECK-NEXT: add.w r0, r7, #14 ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: vfma.f16 q0, q1, r4 -; CHECK-NEXT: le lr, .LBB16_7 -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: le lr, .LBB16_6 +; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: .LBB16_8: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: beq.w .LBB16_3 +; CHECK-NEXT: @ %bb.9: @ %while.body76.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrh r4, [r6], #2 +; CHECK-NEXT: ldrh r4, [r5], #2 ; CHECK-NEXT: vldrh.u16 q1, [r0], #2 ; CHECK-NEXT: vfma.f16 q0, q1, r4 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: add.w r5, r5, r0, lsl #1 -; CHECK-NEXT: b .LBB16_4 +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: add.w r7, r7, r0, lsl #1 +; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_12: @ %if.end -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: add sp, #28 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, %struct.arm_fir_instance_f32* %S, i32 0, i32 1 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1054,115 +1054,124 @@ ; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: ldrh r6, [r0] ; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: ldrd r4, r10, [r0, #4] +; CHECK-NEXT: ldrd r8, r9, [r0, #4] ; CHECK-NEXT: sub.w r0, r6, #8 ; CHECK-NEXT: add.w r3, r0, r0, lsr #29 -; CHECK-NEXT: and r0, r0, #7 ; CHECK-NEXT: asrs r7, r3, #3 ; CHECK-NEXT: cmp r7, #1 +; CHECK-NEXT: and r7, r0, #7 ; CHECK-NEXT: it gt ; CHECK-NEXT: asrgt r5, r3, #3 -; CHECK-NEXT: add.w r3, r4, r6, lsl #2 -; CHECK-NEXT: sub.w r9, r3, #4 -; CHECK-NEXT: rsbs r3, r6, #0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: add.w r3, r10, #32 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mvn r3, #1 +; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: mvns r0, r7 +; CHECK-NEXT: cmn.w r0, #2 +; CHECK-NEXT: csinv r0, r3, r7, le +; CHECK-NEXT: str r7, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: add r0, r7 ; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: b .LBB16_5 -; CHECK-NEXT: .LBB16_3: @ %for.end -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: wls lr, r0, .LBB16_4 -; CHECK-NEXT: b .LBB16_9 -; CHECK-NEXT: .LBB16_4: @ %while.end -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r8, r6, lsl #2 +; CHECK-NEXT: sub.w r11, r0, #4 +; CHECK-NEXT: rsbs r0, r6, #0 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: add.w r0, r9, #32 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: b .LBB16_4 +; CHECK-NEXT: .LBB16_3: @ %while.end +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: subs.w r12, r12, #1 +; CHECK-NEXT: ldrd r11, r1, [sp, #24] @ 8-byte Folded Reload ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: add.w r0, r4, r0, lsl #2 -; CHECK-NEXT: add.w r4, r0, #16 +; CHECK-NEXT: add.w r0, r8, r0, lsl #2 +; CHECK-NEXT: add.w r8, r0, #16 ; CHECK-NEXT: beq .LBB16_12 -; CHECK-NEXT: .LBB16_5: @ %while.body +; CHECK-NEXT: .LBB16_4: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB16_7 Depth 2 +; CHECK-NEXT: @ Child Loop BB16_6 Depth 2 ; CHECK-NEXT: @ Child Loop BB16_10 Depth 2 -; CHECK-NEXT: add.w lr, r10, #8 +; CHECK-NEXT: add.w r7, r9, #8 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 -; CHECK-NEXT: ldrd r3, r7, [r10] -; CHECK-NEXT: ldm.w lr, {r0, r5, r6, lr} -; CHECK-NEXT: ldrd r11, r8, [r10, #24] -; CHECK-NEXT: vstrb.8 q0, [r9], #16 -; CHECK-NEXT: vldrw.u32 q0, [r4], #32 -; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill -; CHECK-NEXT: vldrw.u32 q1, [r4, #-28] +; CHECK-NEXT: ldrd r3, r6, [r9] +; CHECK-NEXT: ldm r7, {r0, r5, r7} +; CHECK-NEXT: ldrd r4, lr, [r9, #20] +; CHECK-NEXT: ldr.w r10, [r9, #28] +; CHECK-NEXT: vstrb.8 q0, [r11], #16 +; CHECK-NEXT: vldrw.u32 q0, [r8], #32 +; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill +; CHECK-NEXT: vldrw.u32 q1, [r8, #-28] ; CHECK-NEXT: vmul.f32 q0, q0, r3 -; CHECK-NEXT: vldrw.u32 q6, [r4, #-24] -; CHECK-NEXT: vldrw.u32 q4, [r4, #-20] -; CHECK-NEXT: vfma.f32 q0, q1, r7 -; CHECK-NEXT: vldrw.u32 q5, [r4, #-16] +; CHECK-NEXT: vldrw.u32 q6, [r8, #-24] +; CHECK-NEXT: vldrw.u32 q4, [r8, #-20] +; CHECK-NEXT: vfma.f32 q0, q1, r6 +; CHECK-NEXT: vldrw.u32 q5, [r8, #-16] ; CHECK-NEXT: vfma.f32 q0, q6, r0 -; CHECK-NEXT: vldrw.u32 q2, [r4, #-12] +; CHECK-NEXT: vldrw.u32 q2, [r8, #-12] ; CHECK-NEXT: vfma.f32 q0, q4, r5 -; CHECK-NEXT: vldrw.u32 q3, [r4, #-8] -; CHECK-NEXT: vfma.f32 q0, q5, r6 +; CHECK-NEXT: vldrw.u32 q3, [r8, #-8] +; CHECK-NEXT: vfma.f32 q0, q5, r7 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vfma.f32 q0, q2, lr -; CHECK-NEXT: vldrw.u32 q1, [r4, #-4] -; CHECK-NEXT: vfma.f32 q0, q3, r11 +; CHECK-NEXT: vfma.f32 q0, q2, r4 +; CHECK-NEXT: vldrw.u32 q1, [r8, #-4] +; CHECK-NEXT: vfma.f32 q0, q3, lr ; CHECK-NEXT: cmp r0, #16 -; CHECK-NEXT: vfma.f32 q0, q1, r8 -; CHECK-NEXT: blo .LBB16_8 -; CHECK-NEXT: @ %bb.6: @ %for.body.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vfma.f32 q0, q1, r10 +; CHECK-NEXT: blo .LBB16_7 +; CHECK-NEXT: @ %bb.5: @ %for.body.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: .LBB16_7: @ %for.body -; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: .LBB16_6: @ %for.body +; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11} -; CHECK-NEXT: vldrw.u32 q1, [r4], #32 -; CHECK-NEXT: vldrw.u32 q6, [r4, #-24] -; CHECK-NEXT: vldrw.u32 q4, [r4, #-20] +; CHECK-NEXT: ldm.w r6, {r0, r3, r4, r5, r7, r10, r11} +; CHECK-NEXT: vldrw.u32 q1, [r8], #32 +; CHECK-NEXT: vldrw.u32 q6, [r8, #-24] +; CHECK-NEXT: vldrw.u32 q4, [r8, #-20] ; CHECK-NEXT: vfma.f32 q0, q1, r0 -; CHECK-NEXT: vldrw.u32 q1, [r4, #-28] -; CHECK-NEXT: vldrw.u32 q5, [r4, #-16] -; CHECK-NEXT: vldrw.u32 q2, [r4, #-12] +; CHECK-NEXT: vldrw.u32 q1, [r8, #-28] +; CHECK-NEXT: vldrw.u32 q5, [r8, #-16] +; CHECK-NEXT: vldrw.u32 q2, [r8, #-12] ; CHECK-NEXT: vfma.f32 q0, q1, r3 -; CHECK-NEXT: ldrd r9, r1, [r7, #24] -; CHECK-NEXT: vfma.f32 q0, q6, r5 -; CHECK-NEXT: vldrw.u32 q3, [r4, #-8] -; CHECK-NEXT: vfma.f32 q0, q4, r6 -; CHECK-NEXT: vldrw.u32 q1, [r4, #-4] -; CHECK-NEXT: vfma.f32 q0, q5, r8 -; CHECK-NEXT: adds r7, #32 -; CHECK-NEXT: vfma.f32 q0, q2, r11 -; CHECK-NEXT: vfma.f32 q0, q3, r9 +; CHECK-NEXT: vldrw.u32 q3, [r8, #-8] +; CHECK-NEXT: vfma.f32 q0, q6, r4 +; CHECK-NEXT: ldr r1, [r6, #28] +; CHECK-NEXT: vfma.f32 q0, q4, r5 +; CHECK-NEXT: vldrw.u32 q1, [r8, #-4] +; CHECK-NEXT: vfma.f32 q0, q5, r7 +; CHECK-NEXT: adds r6, #32 +; CHECK-NEXT: vfma.f32 q0, q2, r10 +; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: vfma.f32 q0, q1, r1 -; CHECK-NEXT: le lr, .LBB16_7 -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_8: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: b .LBB16_3 -; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: le lr, .LBB16_6 +; CHECK-NEXT: b .LBB16_8 +; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: .LBB16_8: @ %for.end +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: beq .LBB16_3 +; CHECK-NEXT: @ %bb.9: @ %while.body76.preheader +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: .LBB16_10: @ %while.body76 -; CHECK-NEXT: @ Parent Loop BB16_5 Depth=1 +; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldr r0, [r7], #4 +; CHECK-NEXT: ldr r0, [r6], #4 ; CHECK-NEXT: vldrw.u32 q1, [r3], #4 ; CHECK-NEXT: vfma.f32 q0, q1, r0 ; CHECK-NEXT: le lr, .LBB16_10 ; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit -; CHECK-NEXT: @ in Loop: Header=BB16_5 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: add.w r4, r4, r0, lsl #2 -; CHECK-NEXT: b .LBB16_4 +; CHECK-NEXT: add.w r8, r8, r0, lsl #2 +; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} diff --git a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll @@ -147,10 +147,7 @@ ; CHECK-LABEL: do_dec2 ; CHECK: entry: -; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, 1 -; CHECK: [[SMIN:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 2) -; CHECK: [[SUB:%[^ ]+]] = sub i32 [[ROUND]], [[SMIN]] -; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[SUB]], 1 +; CHECK: [[HALVE:%[^ ]+]] = lshr i32 {{.*}}, 1 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1 ; CHECK: while.body.lr.ph: diff --git a/llvm/test/Transforms/HardwareLoops/scalar-while.ll b/llvm/test/Transforms/HardwareLoops/scalar-while.ll --- a/llvm/test/Transforms/HardwareLoops/scalar-while.ll +++ b/llvm/test/Transforms/HardwareLoops/scalar-while.ll @@ -122,16 +122,20 @@ ; CHECK-DEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[I:%.*]], [[N:%.*]] ; CHECK-DEC-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-DEC: while.body.preheader: -; CHECK-DEC-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-DEC-NEXT: [[TMP0:%.*]] = sub i32 0, [[I]] +; CHECK-DEC-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-DEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-DEC-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-DEC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-DEC-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-DEC-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-DEC-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-DEC-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void ; @@ -140,18 +144,22 @@ ; CHECK-PHI-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[I:%.*]], [[N:%.*]] ; CHECK-PHI-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-PHI: while.body.preheader: -; CHECK-PHI-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] -; CHECK-PHI-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-PHI-NEXT: [[TMP0:%.*]] = sub i32 0, [[I]] +; CHECK-PHI-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-PHI-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-PHI-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-PHI-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-PHI-NEXT: [[TMP4:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-PHI-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP3:%.*]], [[WHILE_BODY]] ] +; CHECK-PHI-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP6:%.*]], [[WHILE_BODY]] ] ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHI-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) -; CHECK-PHI-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-PHI-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-PHI-NEXT: [[TMP6]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1) +; CHECK-PHI-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-PHI-NEXT: br i1 [[TMP7]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: ; CHECK-PHI-NEXT: ret void ; @@ -160,54 +168,66 @@ ; CHECK-NESTED-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[I:%.*]], [[N:%.*]] ; CHECK-NESTED-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-NESTED: while.body.preheader: -; CHECK-NESTED-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] -; CHECK-NESTED-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-NESTED-NEXT: [[TMP0:%.*]] = sub i32 0, [[I]] +; CHECK-NESTED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-NESTED-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NESTED-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-NESTED-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NESTED-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-NESTED-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-NESTED: while.body: ; CHECK-NESTED-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-NESTED-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-NESTED-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-NESTED-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-NESTED-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-NESTED-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-NESTED-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-NESTED: while.end: ; CHECK-NESTED-NEXT: ret void ; ; CHECK-GUARD-LABEL: @while_gt( ; CHECK-GUARD-NEXT: entry: ; CHECK-GUARD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[I:%.*]], [[N:%.*]] -; CHECK-GUARD-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] ; CHECK-GUARD-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-GUARD: while.body.preheader: -; CHECK-GUARD-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-GUARD-NEXT: [[TMP0:%.*]] = sub i32 0, [[I]] +; CHECK-GUARD-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-GUARD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-GUARD-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-GUARD-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-GUARD-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-GUARD-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-GUARD: while.body: ; CHECK-GUARD-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-GUARD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-GUARD-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-GUARD-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-GUARD-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-GUARD-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-GUARD-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-GUARD-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-GUARD: while.end: ; CHECK-GUARD-NEXT: ret void ; ; CHECK-PHIGUARD-LABEL: @while_gt( ; CHECK-PHIGUARD-NEXT: entry: ; CHECK-PHIGUARD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[I:%.*]], [[N:%.*]] -; CHECK-PHIGUARD-NEXT: [[TMP0:%.*]] = sub i32 [[I]], [[N]] ; CHECK-PHIGUARD-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] ; CHECK-PHIGUARD: while.body.preheader: -; CHECK-PHIGUARD-NEXT: [[TMP1:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-PHIGUARD-NEXT: [[TMP0:%.*]] = sub i32 0, [[I]] +; CHECK-PHIGUARD-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-PHIGUARD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-PHIGUARD-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-PHIGUARD-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +; CHECK-PHIGUARD-NEXT: [[TMP4:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-PHIGUARD-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHIGUARD: while.body: ; CHECK-PHIGUARD-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-PHIGUARD-NEXT: [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP3:%.*]], [[WHILE_BODY]] ] +; CHECK-PHIGUARD-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP6:%.*]], [[WHILE_BODY]] ] ; CHECK-PHIGUARD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHIGUARD-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-PHIGUARD-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHIGUARD-NEXT: [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP2]], i32 1) -; CHECK-PHIGUARD-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-PHIGUARD-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-PHIGUARD-NEXT: [[TMP6]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1) +; CHECK-PHIGUARD-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-PHIGUARD-NEXT: br i1 [[TMP7]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHIGUARD: while.end: ; CHECK-PHIGUARD-NEXT: ret void ; @@ -233,17 +253,20 @@ ; CHECK-DEC-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] ; CHECK-DEC-NEXT: br i1 [[CMP4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-DEC: while.body.preheader: -; CHECK-DEC-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 -; CHECK-DEC-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP1]]) +; CHECK-DEC-NEXT: [[TMP0:%.*]] = sub i32 -1, [[I]] +; CHECK-DEC-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-DEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-DEC-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-DEC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 2 +; CHECK-DEC-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-DEC-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-DEC: while.body: ; CHECK-DEC-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-DEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-DEC-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-DEC-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-DEC-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-DEC-NEXT: br i1 [[TMP2]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-DEC-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-DEC-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-DEC: while.end: ; CHECK-DEC-NEXT: ret void ; @@ -252,19 +275,22 @@ ; CHECK-PHI-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] ; CHECK-PHI-NEXT: br i1 [[CMP4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-PHI: while.body.preheader: -; CHECK-PHI-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 -; CHECK-PHI-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-PHI-NEXT: [[TMP2:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP1]]) +; CHECK-PHI-NEXT: [[TMP0:%.*]] = sub i32 -1, [[I]] +; CHECK-PHI-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-PHI-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-PHI-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-PHI-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 2 +; CHECK-PHI-NEXT: [[TMP4:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-PHI-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHI: while.body: ; CHECK-PHI-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-PHI-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP4:%.*]], [[WHILE_BODY]] ] +; CHECK-PHI-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP6:%.*]], [[WHILE_BODY]] ] ; CHECK-PHI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHI-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-PHI-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHI-NEXT: [[TMP4]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) -; CHECK-PHI-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK-PHI-NEXT: br i1 [[TMP5]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-PHI-NEXT: [[TMP6]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1) +; CHECK-PHI-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-PHI-NEXT: br i1 [[TMP7]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHI: while.end: ; CHECK-PHI-NEXT: ret void ; @@ -273,17 +299,20 @@ ; CHECK-NESTED-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] ; CHECK-NESTED-NEXT: br i1 [[CMP4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-NESTED: while.body.preheader: -; CHECK-NESTED-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 -; CHECK-NESTED-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-NESTED-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP1]]) +; CHECK-NESTED-NEXT: [[TMP0:%.*]] = sub i32 -1, [[I]] +; CHECK-NESTED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-NESTED-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NESTED-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-NESTED-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 2 +; CHECK-NESTED-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-NESTED-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-NESTED: while.body: ; CHECK-NESTED-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-NESTED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-NESTED-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-NESTED-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-NESTED-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-NESTED-NEXT: br i1 [[TMP2]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-NESTED-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-NESTED-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-NESTED: while.end: ; CHECK-NESTED-NEXT: ret void ; @@ -292,17 +321,20 @@ ; CHECK-GUARD-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] ; CHECK-GUARD-NEXT: br i1 [[CMP4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-GUARD: while.body.preheader: -; CHECK-GUARD-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 -; CHECK-GUARD-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-GUARD-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP1]]) +; CHECK-GUARD-NEXT: [[TMP0:%.*]] = sub i32 -1, [[I]] +; CHECK-GUARD-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-GUARD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-GUARD-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-GUARD-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 2 +; CHECK-GUARD-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-GUARD-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-GUARD: while.body: ; CHECK-GUARD-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] ; CHECK-GUARD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-GUARD-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-GUARD-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-GUARD-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) -; CHECK-GUARD-NEXT: br i1 [[TMP2]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-GUARD-NEXT: [[TMP4:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-GUARD-NEXT: br i1 [[TMP4]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-GUARD: while.end: ; CHECK-GUARD-NEXT: ret void ; @@ -311,19 +343,22 @@ ; CHECK-PHIGUARD-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I:%.*]], [[N:%.*]] ; CHECK-PHIGUARD-NEXT: br i1 [[CMP4]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK-PHIGUARD: while.body.preheader: -; CHECK-PHIGUARD-NEXT: [[TMP0:%.*]] = add i32 [[I]], 1 -; CHECK-PHIGUARD-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[N]] -; CHECK-PHIGUARD-NEXT: [[TMP2:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP1]]) +; CHECK-PHIGUARD-NEXT: [[TMP0:%.*]] = sub i32 -1, [[I]] +; CHECK-PHIGUARD-NEXT: [[TMP1:%.*]] = sub i32 -1, [[N]] +; CHECK-PHIGUARD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-PHIGUARD-NEXT: [[TMP2:%.*]] = add i32 [[I]], [[SMAX]] +; CHECK-PHIGUARD-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 2 +; CHECK-PHIGUARD-NEXT: [[TMP4:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 [[TMP3]]) ; CHECK-PHIGUARD-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK-PHIGUARD: while.body: ; CHECK-PHIGUARD-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] -; CHECK-PHIGUARD-NEXT: [[TMP3:%.*]] = phi i32 [ [[TMP2]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP4:%.*]], [[WHILE_BODY]] ] +; CHECK-PHIGUARD-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[WHILE_BODY_PREHEADER]] ], [ [[TMP6:%.*]], [[WHILE_BODY]] ] ; CHECK-PHIGUARD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_ADDR_05]] ; CHECK-PHIGUARD-NEXT: store i32 [[I_ADDR_05]], i32* [[ARRAYIDX]], align 4 ; CHECK-PHIGUARD-NEXT: [[DEC]] = add nsw i32 [[I_ADDR_05]], -1 -; CHECK-PHIGUARD-NEXT: [[TMP4]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP3]], i32 1) -; CHECK-PHIGUARD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK-PHIGUARD-NEXT: br i1 [[TMP5]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK-PHIGUARD-NEXT: [[TMP6]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP5]], i32 1) +; CHECK-PHIGUARD-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-PHIGUARD-NEXT: br i1 [[TMP7]], label [[WHILE_BODY]], label [[WHILE_END]] ; CHECK-PHIGUARD: while.end: ; CHECK-PHIGUARD-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll --- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -18,7 +18,8 @@ ; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: -; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[END1]] +; PTR64-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[END1]], i64 1) +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[UMAX]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] @@ -40,7 +41,8 @@ ; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: -; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[END1]] +; PTR32-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[END1]], i32 1) +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[UMAX]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] @@ -79,22 +81,18 @@ define i8 @testptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-LABEL: @testptrptr( -; PTR64-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i64 -; PTR64-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 ; PTR64-NEXT: br label [[LOOPGUARD:%.*]] ; PTR64: loopguard: -; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] +; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: -; PTR64-NEXT: [[TMP1:%.*]] = sub i64 [[END1]], [[BUF2]] -; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP1]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] -; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; PTR64-NEXT: [[CMP:%.*]] = icmp ult i8* [[GEP]], [[END]] +; PTR64-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] ; PTR64-NEXT: br label [[EXIT]] @@ -103,22 +101,18 @@ ; PTR64-NEXT: ret i8 [[RET]] ; ; PTR32-LABEL: @testptrptr( -; PTR32-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32 -; PTR32-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: br label [[LOOPGUARD:%.*]] ; PTR32: loopguard: -; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] +; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: -; PTR32-NEXT: [[TMP1:%.*]] = sub i32 [[END1]], [[BUF2]] -; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP1]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] -; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; PTR32-NEXT: [[CMP:%.*]] = icmp ult i8* [[GEP]], [[END]] +; PTR32-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] ; PTR32-NEXT: br label [[EXIT]] diff --git a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll --- a/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ b/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -5,31 +5,33 @@ define i32 @remove_loop(i32 %size) #0 { ; CHECK-V8M-LABEL: @remove_loop( ; CHECK-V8M-NEXT: entry: -; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 +; CHECK-V8M-NEXT: [[TMP0:%.*]] = sub i32 -1, [[SIZE:%.*]] +; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 -32) +; CHECK-V8M-NEXT: [[TMP1:%.*]] = add i32 [[SIZE]], [[UMAX]] +; CHECK-V8M-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 32 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-V8M-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 ; CHECK-V8M-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8M: while.cond: ; CHECK-V8M-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8M: while.end: -; CHECK-V8M-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] -; CHECK-V8M-NEXT: ret i32 [[TMP4]] +; CHECK-V8M-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-V8M-NEXT: ret i32 [[TMP5]] ; ; CHECK-V8A-LABEL: @remove_loop( ; CHECK-V8A-NEXT: entry: -; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 +; CHECK-V8A-NEXT: [[TMP0:%.*]] = sub i32 -1, [[SIZE:%.*]] +; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 -32) +; CHECK-V8A-NEXT: [[TMP1:%.*]] = add i32 [[SIZE]], [[UMAX]] +; CHECK-V8A-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 32 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-V8A-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 ; CHECK-V8A-NEXT: br label [[WHILE_COND:%.*]] ; CHECK-V8A: while.cond: ; CHECK-V8A-NEXT: br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK-V8A: while.end: -; CHECK-V8A-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] -; CHECK-V8A-NEXT: ret i32 [[TMP4]] +; CHECK-V8A-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-V8A-NEXT: ret i32 [[TMP5]] ; entry: br label %while.cond @@ -141,7 +143,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -167,7 +169,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -222,7 +224,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -247,7 +249,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -303,7 +305,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -334,7 +336,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -400,7 +402,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -436,7 +438,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -509,7 +511,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -518,7 +520,7 @@ ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -545,7 +547,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -554,7 +556,7 @@ ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -618,7 +620,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -627,7 +629,7 @@ ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -652,7 +654,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -661,7 +663,7 @@ ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -731,7 +733,7 @@ ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -759,7 +761,7 @@ ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i8 addrspace(2)* [[P_02]] to i8 ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8 addrspace(2)* [[BASE]] to i8 ; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i8 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] -; CHECK-NEXT: store i8 [[SUB_PTR_SUB]], i8 addrspace(2)* [[P_02]] +; CHECK-NEXT: store i8 [[SUB_PTR_SUB]], i8 addrspace(2)* [[P_02]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8 addrspace(2)* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(2)* [[INCDEC_PTR]], [[ADD_PTR]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 addrspace(2)* [[INCDEC_PTR]], [[ADD_PTR]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -64,10 +64,10 @@ ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8 addrspace(3)* [[BASE]] to i16 ; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i16 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] ; CHECK-NEXT: [[CONV:%.*]] = trunc i16 [[SUB_PTR_SUB]] to i8 -; CHECK-NEXT: store i8 [[CONV]], i8 addrspace(3)* [[P_02]] +; CHECK-NEXT: store i8 [[CONV]], i8 addrspace(3)* [[P_02]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8 addrspace(3)* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8 addrspace(3)* [[INCDEC_PTR]], [[ADD_PTR]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 addrspace(3)* [[INCDEC_PTR]], [[ADD_PTR]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -29,8 +29,8 @@ ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 ; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[ADD_PTR]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[INCDEC_PTR]], [[ADD_PTR]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -304,15 +304,13 @@ ; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i8* [[IVSTART]], [[IVEND]] ; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [256 x i8], [256 x i8]* [[BASE]], i64 0, i64 [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 ; CHECK-NEXT: store i8 3, i8* [[APTR]], align 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[INCDEC_PTR]], [[IVEND]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll --- a/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ b/llvm/test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -6,11 +6,12 @@ define i32 @remove_loop(i32 %size) { ; CHECK-LABEL: @remove_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31) -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]] -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 5 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[SIZE:%.*]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 -32) +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SIZE]], [[UMAX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 5 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 5 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: ; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] @@ -18,8 +19,8 @@ ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]] -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %while.cond diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -27,6 +27,7 @@ ; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0 ; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]] ; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[TMP2]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ] @@ -36,64 +37,67 @@ ; CHECK: for.body8.lr.ph: ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8 ; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[UMIN1]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP6]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[CONV3]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 -1, [[TMP5]] +; CHECK-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP3]], i32 [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[UMAX1]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP8]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP7:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP8]]) -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[UMIN]] -; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP9]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP10]]) +; CHECK-NEXT: [[TMP9:%.*]] = add i8 [[CONV3]], -1 +; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP9]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 -1, [[TMP10]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP3]], i32 [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[UMAX]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP14]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[TMP7]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[TMP7]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i8 [[TMP12]], [[TMP7]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i8 [[TMP11]], [[TMP7]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 true, i1 [[TMP13]], i1 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[TMP9]], 255 -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP19:%.*]] = or i1 false, [[TMP18]] -; CHECK-NEXT: br i1 [[TMP19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = add i8 [[TMP9]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP16:%.*]] = sub i8 [[TMP9]], [[MUL_RESULT]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i8 [[TMP16]], [[TMP9]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i8 [[TMP15]], [[TMP9]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ugt i32 [[TMP13]], 255 +; CHECK-NEXT: [[TMP21:%.*]] = or i1 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP23:%.*]] = or i1 false, [[TMP22]] +; CHECK-NEXT: br i1 [[TMP23]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP6]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP6]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP8]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP8]], [[N_MOD_VF]] ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP20]], [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], -4 -; CHECK-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI2]], -; CHECK-NEXT: [[TMP26:%.*]] = add i8 [[TMP22]], -1 -; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1 -; CHECK-NEXT: [[TMP28:%.*]] = zext i8 [[TMP26]] to i32 -; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP24]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[OFFSET_IDX]], -4 +; CHECK-NEXT: [[TMP28]] = add <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP30:%.*]] = add i8 [[TMP26]], -1 +; CHECK-NEXT: [[TMP31:%.*]] = add i8 [[TMP27]], -1 +; CHECK-NEXT: [[TMP32:%.*]] = zext i8 [[TMP30]] to i32 +; CHECK-NEXT: [[TMP33:%.*]] = zext i8 [[TMP31]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP25]], [[TMP24]] -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP29]], [[TMP28]] +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] @@ -102,9 +106,9 @@ ; CHECK-NEXT: [[DEC]] = add i8 [[C_04]], -1 ; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[DEC]] to i32 ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1,3 +1,4 @@ +; XFAIL: * ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll --- a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll @@ -11,10 +11,10 @@ define void @test(i64 %arg, i32 %arg1, i8** %base) { ; CHECK: LAA: Adding RT check for range: -; CHECK-NEXT: Start: ((8 * (zext i32 (-1 + %arg1) to i64)) + (8 * (1 smin %arg)) + (-8 * %arg) + %base) +; CHECK-NEXT: Start: (-8 + (8 * (zext i32 (-1 + %arg1) to i64)) + (-8 * ((-2 smax (-1 + (-1 * %arg))) + %arg)) + %base) ; CHECK-SAME: End: (8 + (8 * (zext i32 (-1 + %arg1) to i64)) + %base) ; CHECK-NEXT: LAA: Adding RT check for range: -; CHECK-NEXT: Start: ((8 * (1 smin %arg)) + %base) +; CHECK-NEXT: Start: (-8 + (-8 * (-2 smax (-1 + (-1 * %arg)))) + %base) ; CHECK-SAME: End: (8 + (8 * %arg) + %base) ; CHECK: vector.body