diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1071,10 +1071,17 @@ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // It isn't legal for optimizations to construct new ptrtoint expressions + // for non-integral pointers. + if (getDataLayout().isNonIntegralPointerType(Op->getType())) + return getCouldNotCompute(); + Type *IntPtrTy = getDataLayout().getIntPtrType(Op->getType()); - // We can only model ptrtoint if SCEV's effective (integer) type + // We can only trivially model ptrtoint if SCEV's effective (integer) type // is sufficiently wide to represent all possible pointer values. + // We could theoretically teach SCEV to truncate wider pointers, but + // that isn't implemented for now. if (getDataLayout().getTypeSizeInBits(getEffectiveSCEVType(Op->getType())) != getDataLayout().getTypeSizeInBits(IntPtrTy)) return getCouldNotCompute(); @@ -7527,6 +7534,10 @@ for (auto *PredSet : PredSetList) for (auto *P : *PredSet) addPredicate(P); + assert((isa(E) || !E->getType()->isPointerTy()) && + "Backedge count should be int"); + assert((isa(M) || !M->getType()->isPointerTy()) && + "Max backedge count should be int"); } ScalarEvolution::ExitLimit::ExitLimit( @@ -7952,6 +7963,16 @@ switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) + if (LHS->getType()->isPointerTy()) { + LHS = getLosslessPtrToIntExpr(LHS); + if (isa(LHS)) + return LHS; + } + if (RHS->getType()->isPointerTy()) { + RHS = getLosslessPtrToIntExpr(RHS); + if (isa(RHS)) + return RHS; + } ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; @@ -11515,6 +11536,22 @@ } const SCEV *Start = IV->getStart(); + + // Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond. + // Use integer-typed versions for actual computation. + const SCEV *OrigStart = Start; + const SCEV *OrigRHS = RHS; + if (Start->getType()->isPointerTy()) { + Start = getLosslessPtrToIntExpr(Start); + if (isa(Start)) + return Start; + } + if (RHS->getType()->isPointerTy()) { + RHS = getLosslessPtrToIntExpr(RHS); + if (isa(RHS)) + return RHS; + } + const SCEV *End = RHS; // When the RHS is not invariant, we do not know the end bound of the loop and // cannot calculate the ExactBECount needed by ExitLimit. However, we can @@ -11541,13 +11578,13 @@ // result is as above, and if not max(End,Start) is Start so we get a backedge // count of zero. const SCEV *BECount; - if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(OrigStart, Stride), OrigRHS)) BECount = BECountIfBackedgeTaken; else { // If we know that RHS >= Start in the context of loop, then we know that // max(RHS, Start) = RHS at this point. if (isLoopEntryGuardedByCond( - L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, RHS, Start)) + L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, OrigRHS, OrigStart)) End = RHS; else End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); @@ -11626,6 +11663,17 @@ End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); } + if (Start->getType()->isPointerTy()) { + Start = getLosslessPtrToIntExpr(Start); + if (isa(Start)) + return Start; + } + if (End->getType()->isPointerTy()) { + End = getLosslessPtrToIntExpr(End); + if (isa(End)) + return End; + } + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride); APInt MaxStart = IsSigned ? getSignedRangeMax(Start) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -910,13 +910,13 @@ ; CHECK-NEXT: %incdec.ptr112 = getelementptr inbounds i8, i8* %text.addr.5, i64 -1 ; CHECK-NEXT: --> {(-1 + null),+,-1}<%while.cond111> U: full-set S: full-set Exits: <> LoopDispositions: { %while.cond111: Computable, %while.body: Variant } ; CHECK-NEXT: %lastout.2271 = phi i8* [ %incdec.ptr126, %while.body125 ], [ %ptr, %while.end117 ] -; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {%ptr,+,1}<%while.body125> U: full-set S: full-set Exits: {(-2 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: %incdec.ptr126 = getelementptr inbounds i8, i8* %lastout.2271, i64 1 -; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + null),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } +; CHECK-NEXT: --> {(1 + %ptr),+,1}<%while.body125> U: [1,0) S: [1,0) Exits: {(-1 + (-1 * (ptrtoint i8* %ptr to i64)) + %ptr),+,-1}<%while.cond111> LoopDispositions: { %while.body125: Computable } ; CHECK-NEXT: Determining loop execution counts for: @crash -; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: backedge-taken count is {(-2 + (-1 * (ptrtoint i8* %ptr to i64))),+,-1}<%while.cond111> ; CHECK-NEXT: Loop %while.body125: max backedge-taken count is -2 -; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * %ptr) + null),+,-1}<%while.cond111> +; CHECK-NEXT: Loop %while.body125: Predicated backedge-taken count is {(-2 + (-1 * (ptrtoint i8* %ptr to i64))),+,-1}<%while.cond111> ; CHECK-NEXT: Predicates: ; CHECK: Loop %while.body125: Trip multiple is 1 ; CHECK-NEXT: Loop %while.cond111: Unpredictable backedge-taken count. diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -93,18 +93,20 @@ } define void @pointer_iv_nowrap(i8* %startptr, i8* %endptr) local_unnamed_addr { -; CHECK-LABEL: Classifying expressions for: @pointer_iv_nowrap -; CHECK-NEXT: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 -; CHECK-NEXT: --> (2000 + %startptr) U: [2000,0) S: [2000,0) -; CHECK-NEXT: %iv = phi i8* [ %init, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {(2000 + %startptr),+,1}<%loop> U: [2000,0) S: [2000,0) -; CHECK-NEXT: %iv.next = getelementptr inbounds i8, i8* %iv, i64 1 -; CHECK-NEXT: --> {(2001 + %startptr),+,1}<%loop> U: [2001,0) S: [2001,0) - -; CHECK-NEXT:Determining loop execution counts for: @pointer_iv_nowrap -; CHECK-NEXT:Loop %loop: backedge-taken count is (-2000 + (-1 * %startptr) + ((2000 + %startptr) umax %endptr)) -; CHECK-NEXT:Loop %loop: max backedge-taken count is -2001 -; CHECK-NEXT:Loop %loop: Predicated backedge-taken count is (-2000 + (-1 * %startptr) + ((2000 + %startptr) umax %endptr)) +; CHECK-LABEL: 'pointer_iv_nowrap' +; CHECK-NEXT: Classifying expressions for: @pointer_iv_nowrap +; CHECK-NEXT: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 +; CHECK-NEXT: --> (2000 + %startptr) U: [2000,0) S: [2000,0) +; CHECK-NEXT: %iv = phi i8* [ %init, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: --> {(2000 + %startptr),+,1}<%loop> U: [2000,0) S: [2000,0) Exits: ((-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %iv.next = getelementptr inbounds i8, i8* %iv, i64 1 +; CHECK-NEXT: --> {(2001 + %startptr),+,1}<%loop> U: [2001,0) S: [2001,0) Exits: (1 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64)) + %startptr) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @pointer_iv_nowrap +; CHECK-NEXT: Loop %loop: backedge-taken count is (-2000 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64))) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -2001 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (-2000 + (-1 * (ptrtoint i8* %startptr to i64)) + ((2000 + (ptrtoint i8* %startptr to i64)) umax (ptrtoint i8* %endptr to i64))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 ; entry: %init = getelementptr inbounds i8, i8* %startptr, i64 2000 @@ -119,3 +121,8 @@ end: ret void } + +define i32 @dummy(i32 %start, i32* %p, i32* %q) { +entry: + ret i32 0 +} diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll --- a/llvm/test/Analysis/ScalarEvolution/nsw.ll +++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -102,7 +102,7 @@ store i32 0, i32* %__first.addr.08.i.i, align 4 %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i -; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) +; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %begin to i64)) + (ptrtoint i32* %end to i64)) /u 4) ; CHECK: Loop %for.body.i.i: max backedge-taken count is 4611686018427387903 _ZSt4fillIPiiEvT_S1_RKT0_.exit: ; preds = %for.body.i.i, %entry ret void @@ -147,7 +147,7 @@ } ; CHECK-LABEL: PR12376 -; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax %arg1)) /u 4)) + %arg) +; CHECK: --> {(4 + %arg),+,4}<%bb2>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (4 + (4 * ((-1 + (-1 * (ptrtoint i32* %arg to i64)) + ((4 + (ptrtoint i32* %arg to i64)) umax (ptrtoint i32* %arg1 to i64))) /u 4)) + %arg) define void @PR12376(i32* nocapture %arg, i32* nocapture %arg1) { bb: br label %bb2 diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -380,25 +380,25 @@ ; X64-NEXT: %i4 = ptrtoint i8* %arg to i64 ; X64-NEXT: --> (ptrtoint i8* %arg to i64) U: full-set S: full-set ; X64-NEXT: %i7 = phi i8* [ %arg, %bb3 ], [ %i14, %bb6 ] -; X64-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i8 = load i8, i8* %i7, align 1 ; X64-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i9 = ptrtoint i8* %i7 to i64 -; X64-NEXT: --> {(ptrtoint i8* %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + (ptrtoint i8* %arg to i64) + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(ptrtoint i8* %arg to i64),+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint i8* %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * %arg) + %arg1) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, i8* %arg2, i64 %i10 -; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + %arg1 + %arg2) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, i8* %i11, align 1 ; X64-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i13 = add i8 %i12, %i8 ; X64-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char -; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) -; X64-NEXT: Loop %bb6: max backedge-taken count is -2 -; X64-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * %arg) + %arg1) +; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) +; X64-NEXT: Loop %bb6: max backedge-taken count is -1 +; X64-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i64)) + (ptrtoint i8* %arg1 to i64)) ; X64-NEXT: Predicates: ; X64: Loop %bb6: Trip multiple is 1 ; @@ -407,25 +407,25 @@ ; X32-NEXT: %i4 = ptrtoint i8* %arg to i64 ; X32-NEXT: --> (zext i32 (ptrtoint i8* %arg to i32) to i64) U: [0,4294967296) S: [0,4294967296) ; X32-NEXT: %i7 = phi i8* [ %arg, %bb3 ], [ %i14, %bb6 ] -; X32-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + %arg1) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i8 = load i8, i8* %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint i8* %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint i8* %arg to i32) to i64),+,1}<%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i8* (-1 + (-1 * %arg) + %arg1) to i64) + (zext i32 (ptrtoint i8* %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint i8* %arg to i32) to i64),+,1}<%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) to i64) + (zext i32 (ptrtoint i8* %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i8* (-1 + (-1 * %arg) + %arg1) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, i8* %arg2, i64 %i10 -; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * %arg) + %arg1 + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, i8* %i11, align 1 ; X32-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i13 = add i8 %i12, %i8 ; X32-NEXT: --> (%i12 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i14 = getelementptr inbounds i8, i8* %i7, i64 1 -; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: %arg1 LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(1 + %arg),+,1}<%bb6> U: [1,0) S: [1,0) Exits: ((-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char -; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * %arg) + %arg1) -; X32-NEXT: Loop %bb6: max backedge-taken count is -2 -; X32-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * %arg) + %arg1) +; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) +; X32-NEXT: Loop %bb6: max backedge-taken count is -1 +; X32-NEXT: Loop %bb6: Predicated backedge-taken count is (-1 + (-1 * (ptrtoint i8* %arg to i32)) + (ptrtoint i8* %arg1 to i32)) ; X32-NEXT: Predicates: ; X32: Loop %bb6: Trip multiple is 1 ; @@ -465,13 +465,13 @@ ; X64-NEXT: %i4 = ptrtoint i32* %arg to i64 ; X64-NEXT: --> (ptrtoint i32* %arg to i64) U: full-set S: full-set ; X64-NEXT: %i7 = phi i32* [ %arg, %bb3 ], [ %i15, %bb6 ] -; X64-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i8 = load i32, i32* %i7, align 4 ; X64-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i9 = ptrtoint i32* %i7 to i64 -; X64-NEXT: --> {(ptrtoint i32* %arg to i64),+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + (ptrtoint i32* %arg to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(ptrtoint i32* %arg to i64),+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + (ptrtoint i32* %arg to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,4}<%bb6> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,4}<%bb6> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = ashr exact i64 %i10, 2 ; X64-NEXT: --> %i11 U: [-2305843009213693952,2305843009213693952) S: [-2305843009213693952,2305843009213693952) Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 @@ -481,11 +481,11 @@ ; X64-NEXT: %i14 = add nsw i32 %i13, %i8 ; X64-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X64-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_int -; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X64-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4) ; X64-NEXT: Loop %bb6: max backedge-taken count is 4611686018427387903 -; X64-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X64-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i64)) + (ptrtoint i32* %arg1 to i64)) /u 4) ; X64-NEXT: Predicates: ; X64: Loop %bb6: Trip multiple is 1 ; @@ -494,13 +494,13 @@ ; X32-NEXT: %i4 = ptrtoint i32* %arg to i64 ; X32-NEXT: --> (zext i32 (ptrtoint i32* %arg to i32) to i64) U: [0,4294967296) S: [0,4294967296) ; X32-NEXT: %i7 = phi i32* [ %arg, %bb3 ], [ %i15, %bb6 ] -; X32-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg,+,4}<%bb6> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i8 = load i32, i32* %i7, align 4 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint i32* %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint i32* %arg to i32) to i64),+,4}<%bb6> U: [0,8589934588) S: [0,8589934588) Exits: ((zext i32 (ptrtoint i32* %arg to i32) to i64) + (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4))) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint i32* %arg to i32) to i64),+,4}<%bb6> U: [0,8589934588) S: [0,8589934588) Exits: ((zext i32 (ptrtoint i32* %arg to i32) to i64) + (4 * ((zext i32 (-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) to i64) /u 4))) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,4}<%bb6> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((zext i32* (-4 + (-1 * %arg) + %arg1) to i64) /u 4)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,4}<%bb6> U: [0,4294967293) S: [0,4294967293) Exits: (4 * ((zext i32 (-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) to i64) /u 4)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = ashr exact i64 %i10, 2 ; X32-NEXT: --> %i11 U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11 @@ -510,11 +510,11 @@ ; X32-NEXT: %i14 = add nsw i32 %i13, %i8 ; X32-NEXT: --> (%i13 + %i8) U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i15 = getelementptr inbounds i32, i32* %i7, i64 1 -; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * %arg) + %arg1) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(4 + %arg),+,4}<%bb6> U: [4,0) S: [4,0) Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4)) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_int -; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X32-NEXT: Loop %bb6: backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4) ; X32-NEXT: Loop %bb6: max backedge-taken count is 1073741823 -; X32-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * %arg) + %arg1) /u 4) +; X32-NEXT: Loop %bb6: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint i32* %arg to i32)) + (ptrtoint i32* %arg1 to i32)) /u 4) ; X32-NEXT: Predicates: ; X32: Loop %bb6: Trip multiple is 1 ; diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll --- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -12,17 +12,19 @@ define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-LABEL: @testnullptrptr( +; PTR64-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 ; PTR64-NEXT: br label [[LOOPGUARD:%.*]] ; PTR64: loopguard: -; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] +; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[END1]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -32,17 +34,19 @@ ; PTR64-NEXT: ret i8 [[RET]] ; ; PTR32-LABEL: @testnullptrptr( +; PTR32-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: br label [[LOOPGUARD:%.*]] ; PTR32: loopguard: -; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END:%.*]] +; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* null, [[END]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i32 [[END1]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -75,17 +79,21 @@ define i8 @testptrptr(i8* %buf, i8* %end) nounwind { ; PTR64-LABEL: @testptrptr( +; PTR64-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i64 +; PTR64-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 ; PTR64-NEXT: br label [[LOOPGUARD:%.*]] ; PTR64: loopguard: -; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] +; PTR64-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] ; PTR64-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR64: preheader: +; PTR64-NEXT: [[TMP1:%.*]] = sub i64 [[END1]], [[BUF2]] +; PTR64-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i64 [[TMP1]] ; PTR64-NEXT: br label [[LOOP:%.*]] ; PTR64: loop: ; PTR64-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR64-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR64-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR64-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR64-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR64: exit.loopexit: ; PTR64-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -95,17 +103,21 @@ ; PTR64-NEXT: ret i8 [[RET]] ; ; PTR32-LABEL: @testptrptr( +; PTR32-NEXT: [[BUF2:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32 +; PTR32-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i32 ; PTR32-NEXT: br label [[LOOPGUARD:%.*]] ; PTR32: loopguard: -; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF:%.*]], [[END:%.*]] +; PTR32-NEXT: [[GUARD:%.*]] = icmp ult i8* [[BUF]], [[END]] ; PTR32-NEXT: br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; PTR32: preheader: +; PTR32-NEXT: [[TMP1:%.*]] = sub i32 [[END1]], [[BUF2]] +; PTR32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[BUF]], i32 [[TMP1]] ; PTR32-NEXT: br label [[LOOP:%.*]] ; PTR32: loop: ; PTR32-NEXT: [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ] ; PTR32-NEXT: [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1 ; PTR32-NEXT: [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1 -; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[END]] +; PTR32-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]] ; PTR32-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; PTR32: exit.loopexit: ; PTR32-NEXT: [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ] @@ -306,10 +318,7 @@ ; PTR64: for.body.preheader: ; PTR64-NEXT: br label [[FOR_BODY:%.*]] ; PTR64: for.body: -; PTR64-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] -; PTR64-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR64-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] -; PTR64-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR64-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR64: for.end1609.loopexit: ; PTR64-NEXT: br label [[FOR_END1609]] ; PTR64: for.end1609: @@ -325,10 +334,7 @@ ; PTR32: for.body.preheader: ; PTR32-NEXT: br label [[FOR_BODY:%.*]] ; PTR32: for.body: -; PTR32-NEXT: [[R_17193:%.*]] = phi i8* [ [[INCDEC_PTR1608:%.*]], [[FOR_BODY]] ], [ null, [[FOR_BODY_PREHEADER]] ] -; PTR32-NEXT: [[INCDEC_PTR1608]] = getelementptr i8, i8* [[R_17193]], i64 1 -; PTR32-NEXT: [[CMP1604:%.*]] = icmp ult i8* [[INCDEC_PTR1608]], [[ADD_PTR1603]] -; PTR32-NEXT: br i1 [[CMP1604]], label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] +; PTR32-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_END1609_LOOPEXIT:%.*]] ; PTR32: for.end1609.loopexit: ; PTR32-NEXT: br label [[FOR_END1609]] ; PTR32: for.end1609: diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -12,13 +12,15 @@ define void @foo() { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 500), i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 1)) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 add (i64 ptrtoint ([0 x i8]* @global to i64), i64 1), [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP:%.*]] = phi i8* [ [[TMP4:%.*]], [[BB7:%.*]] ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 2), [[BB:%.*]] ] ; CHECK-NEXT: [[TMP4]] = getelementptr inbounds i8, i8* [[TMP]], i64 -1 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8* [[TMP4]], getelementptr inbounds ([0 x i8], [0 x i8]* @global, i64 0, i64 500) -; CHECK-NEXT: br i1 [[TMP5]], label [[BB7]], label [[BB11:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[BB7]], label [[BB11:%.*]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll --- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -16,15 +16,14 @@ define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) { ; CHECK-LABEL: @_Z15my_basic_memsetPcS_c( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64, !dbg [[DBG15:![0-9]+]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg [[DBG15]] +; CHECK-NEXT: [[PTR2:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64, !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64, !dbg [[DBG15]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END]], !dbg [[DBG15]] ; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15]] ; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg [[DBG16:![0-9]+]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg [[DBG17]] -; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64, !dbg [[DBG17]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg [[DBG18:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[PTR2]], !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[TMP0]], i1 false), !dbg [[DBG18:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG17]] ; CHECK: for.body: ; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg [[DBG19:![0-9]+]] diff --git a/llvm/test/Transforms/LoopReroll/ptrindvar.ll b/llvm/test/Transforms/LoopReroll/ptrindvar.ll --- a/llvm/test/Transforms/LoopReroll/ptrindvar.ll +++ b/llvm/test/Transforms/LoopReroll/ptrindvar.ll @@ -14,10 +14,10 @@ ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] ;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %indvar -;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.011 +;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %5, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] @@ -52,12 +52,12 @@ ;CHECK-LABEL: while.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] -;CHECK-NEXT: %4 = mul nsw i64 %indvar, -1 -;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %4 -;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %5, %S.011 +;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5 +;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %6, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %4 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model-assert.ll @@ -16,8 +16,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] ; CHECK: if.then: -; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i32 0 @@ -25,68 +34,68 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw <4 x i32> [[TMP4]], -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, i8* null, i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT3]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shl nuw <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[P:%.*]], align 1, !tbaa [[TBAA1:![0-9]+]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT4]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP13]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw <4 x i32> [[TMP10]], -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw <4 x i32> [[TMP11]], -; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP12]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP15]], zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 -; CHECK-NEXT: store i32 [[TMP23]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 -; CHECK-NEXT: store i32 [[TMP24]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 -; CHECK-NEXT: store i32 [[TMP25]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT5]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw <4 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <4 x i32> [[TMP15]], +; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP16]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = or <4 x i32> [[TMP17]], [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i32> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = or <4 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: store i32 [[TMP26]], i32* undef, align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 ; CHECK-NEXT: store i32 [[TMP27]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP21]], i32 2 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 ; CHECK-NEXT: store i32 [[TMP28]], i32* undef, align 4, !tbaa [[TBAA4]] -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP21]], i32 3 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 ; CHECK-NEXT: store i32 [[TMP29]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: store i32 [[TMP30]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: store i32 [[TMP31]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP25]], i32 2 +; CHECK-NEXT: store i32 [[TMP32]], i32* undef, align 4, !tbaa [[TBAA4]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP25]], i32 3 +; CHECK-NEXT: store i32 [[TMP33]], i32* undef, align 4, !tbaa [[TBAA4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[SW_EPILOG:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ null, [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[IF_THEN]] ] ; CHECK-NEXT: br label [[FOR_BODY68:%.*]] ; CHECK: for.body68: ; CHECK-NEXT: [[P_359:%.*]] = phi i8* [ [[ADD_PTR86:%.*]], [[FOR_BODY68]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[CONV70:%.*]] = zext i8 [[X]] to i32 ; CHECK-NEXT: [[SHL71:%.*]] = shl nuw i32 [[CONV70]], 24 -; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] -; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP31]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[P]], align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[CONV73:%.*]] = zext i8 [[TMP35]] to i32 ; CHECK-NEXT: [[SHL74:%.*]] = shl nuw nsw i32 [[CONV73]], 16 ; CHECK-NEXT: [[OR75:%.*]] = or i32 [[SHL74]], [[SHL71]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] +; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* undef, align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: [[SHL78:%.*]] = shl nuw nsw i32 undef, 8 ; CHECK-NEXT: [[OR79:%.*]] = or i32 [[OR75]], [[SHL78]] ; CHECK-NEXT: [[CONV81:%.*]] = zext i8 undef to i32 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -12,13 +12,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[B1]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP0]] -; CHECK-NEXT: [[EXITCOUNT_PTRCNT_TO_INT:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[EXITCOUNT_PTRCNT_TO_INT]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], -1 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -68,7 +66,7 @@ ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[EXITCOUNT_PTRCNT_TO_INT]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll --- a/llvm/test/Transforms/LoopVectorize/pr45259.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll @@ -6,7 +6,7 @@ define i8 @widget(i8* %arr, i8 %t9) { ; CHECK-LABEL: @widget( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARR1:%.*]] = ptrtoint i8* [[ARR:%.*]] to i64 +; CHECK-NEXT: [[ARR2:%.*]] = ptrtoint i8* [[ARR:%.*]] to i64 ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[T1_0:%.*]] = phi i8* [ [[ARR]], [[BB:%.*]] ], [ null, [[BB6]] ] @@ -14,30 +14,29 @@ ; CHECK-NEXT: br i1 [[C]], label [[FOR_PREHEADER:%.*]], label [[BB6]] ; CHECK: for.preheader: ; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi i8* [ [[T1_0]], [[BB6]] ] -; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint i8* [[T1_0_LCSSA]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ARR1]] to i32 +; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = ptrtoint i8* [[T1_0_LCSSA]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[T1_0_LCSSA1]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[ARR2]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 -1, [[ARR1]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[T1_0_LCSSA]], i64 [[TMP3]] -; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[SCEVGEP2]] to i8 -; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP4]]) +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[T1_0_LCSSA1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[ARR2]] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8 +; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP5]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i8 1, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i8 1, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i1 [[TMP7]], i1 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[SCEVGEP2]], 255 -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = add i8 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i8 1, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = select i1 false, i1 [[TMP8]], i1 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP4]], 255 +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[TMP14:%.*]] = or i1 false, [[TMP13]] +; CHECK-NEXT: br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] @@ -48,18 +47,18 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i8> [[TMP14]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp slt <4 x i8> [[TMP14]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i1> [[TMP17]] to <4 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[TMP18]], <4 x i8>* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[ARR]], i8 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp slt <4 x i8> [[TMP15]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i1> [[TMP18]] to <4 x i8> +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP21]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll --- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll +++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll @@ -115,7 +115,7 @@ declare void @llvm.experimental.guard(i1, ...) ; This tests getRangeRef acts as intended with different idx size. -; CHECK: max backedge-taken count is 41 +; CHECK: Loop %loop: Unpredictable max backedge-taken count. define void @test_range_ref1(i8 %t) { entry: %t.ptr = inttoptr i8 %t to i8*