diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -564,6 +564,8 @@ const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + unsigned Depth = 0); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); @@ -1219,6 +1221,45 @@ /// to be infinite, it must also be undefined. bool loopIsFiniteByAssumption(const Loop *L); + class FoldID { + SmallVector Bits; + + public: + void addInteger(unsigned long I) { Bits.push_back(I); } + void addInteger(unsigned I) { Bits.push_back(I); } + void addInteger(int I) { Bits.push_back(I); } + + void addInteger(unsigned long long I) { + addInteger(unsigned(I)); + addInteger(unsigned(I >> 32)); + } + + void addPointer(const void *Ptr) { + // Note: this adds pointers to the hash using sizes and endianness that + // depend on the host. It doesn't matter, however, because hashing on + // pointer values is inherently unstable. Nothing should depend on the + // ordering of nodes in the folding set. + static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long), + "unexpected pointer size"); + addInteger(reinterpret_cast(Ptr)); + } + + unsigned computeHash() const { + unsigned Hash = Bits.size(); + for (unsigned I = 0; I != Bits.size(); ++I) + Hash = detail::combineHashValue(Hash, Bits[I]); + return Hash; + } + bool operator==(const FoldID &RHS) const { + if (Bits.size() != RHS.Bits.size()) + return false; + for (unsigned I = 0; I != Bits.size(); ++I) + if (Bits[I] != RHS.Bits[I]) + return false; + return true; + } + }; + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1280,6 +1321,11 @@ /// This is a cache of the values we have analyzed so far. ValueExprMapType ValueExprMap; + /// This is a cache for expressions that got folded to a different existing + /// SCEV. + DenseMap FoldCache; + DenseMap> FoldCacheUser; + /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; @@ -2307,6 +2353,28 @@ const SCEV *BackedgeCount = nullptr; }; +template <> struct DenseMapInfo { + static inline ScalarEvolution::FoldID getEmptyKey() { + ScalarEvolution::FoldID ID; + ID.addInteger(~0ULL); + return ID; + } + static inline ScalarEvolution::FoldID getTombstoneKey() { + ScalarEvolution::FoldID ID; + ID.addInteger(~0ULL - 1ULL); + return ID; + } + + static unsigned getHashValue(const ScalarEvolution::FoldID &Val) { + return Val.computeHash(); + } + + static bool isEqual(const ScalarEvolution::FoldID &LHS, + const ScalarEvolution::FoldID &RHS) { + return LHS == RHS; + } +}; + } // end namespace llvm #endif // LLVM_ANALYSIS_SCALAREVOLUTION_H diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1609,6 +1609,30 @@ assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); Ty = getEffectiveSCEVType(Ty); + FoldID ID; + ID.addInteger(scZeroExtend); + ID.addPointer(Op); + ID.addPointer(Ty); + auto Iter = FoldCache.find(ID); + if (Iter != FoldCache.end()) + return Iter->second; + + const SCEV *S = getZeroExtendExprImpl(Op, Ty, Depth); + if (!isa(S)) { + FoldCache.insert({ID, S}); + auto R = FoldCacheUser.insert({S, {}}); + R.first->second.push_back(ID); + } + return S; +} + +const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + unsigned Depth) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); + assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); + // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( @@ -8368,6 +8392,8 @@ HasRecMap.clear(); MinTrailingZerosCache.clear(); PredicatedSCEVRewrites.clear(); + FoldCache.clear(); + FoldCacheUser.clear(); } void ScalarEvolution::forgetLoop(const Loop *L) { @@ -13872,6 +13898,12 @@ forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt()); BECountUsers.erase(BEUsersIt); } + + auto FoldUser = FoldCacheUser.find(S); + if (FoldUser != FoldCacheUser.end()) + for (auto &KV : FoldUser->second) + FoldCache.erase(KV); + FoldCacheUser.erase(S); } void @@ -14178,6 +14210,35 @@ } } } + + // Verify FoldCache/FoldCacheUser caches. + for (auto [FoldID, Expr] : FoldCache) { + auto I = FoldCacheUser.find(Expr); + if (I == FoldCacheUser.end()) { + dbgs() << "Missing entry in FoldCacheUser for cached expression " << *Expr + << "!\n"; + std::abort(); + } + if (!is_contained(I->second, FoldID)) { + dbgs() << "Missing FoldID in cached users of " << *Expr << "!\n"; + std::abort(); + } + } + for (auto [Expr, IDs] : FoldCacheUser) { + for (auto &FoldID : IDs) { + auto I = FoldCache.find(FoldID); + if (I == FoldCache.end()) { + dbgs() << "Missing entry in FoldCache for expression " << *Expr + << "!\n"; + std::abort(); + } + if (I->second != Expr) { + dbgs() << "Entry in FoldCache doesn't match FoldCacheUser: " + << *I->second << " != " << *Expr << "!\n"; + std::abort(); + } + } + } } bool ScalarEvolution::invalidate( diff --git a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll @@ -0,0 +1,131 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +define i32 @pr58402_large_number_of_zext(ptr %dst) { +; CHECK-LABEL: 'pr58402_large_number_of_zext' +; CHECK-NEXT: Classifying expressions for: @pr58402_large_number_of_zext +; CHECK-NEXT: %d.0 = phi i32 [ 0, %entry ], [ %add7.15, %header ] +; CHECK-NEXT: --> %d.0 U: [0,65) S: [0,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %b.0 = phi i32 [ 59, %entry ], [ %b.0, %header ] +; CHECK-NEXT: --> 59 U: [59,60) S: [59,60) Exits: 59 LoopDispositions: { %header: Invariant } +; CHECK-NEXT: %conv.neg = sext i1 %cmp to i32 +; CHECK-NEXT: --> (sext i1 %cmp to i32) U: [-1,1) S: [-1,1) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %conv = zext i1 %cmp to i32 +; CHECK-NEXT: --> (zext i1 %cmp to i32) U: [0,2) S: [0,2) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i = and i32 %conv, -2 +; CHECK-NEXT: --> (2 * ((zext i1 %cmp to i32) /u 2)) U: [0,1) S: [0,1) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7 = add i32 %i, 4 +; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i1 = and i32 %add7, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2)) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.1 = add i32 %i1, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i2 = and i32 %add7.1, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2)) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.2 = add i32 %i2, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i3 = and i32 %add7.2, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2)) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.3 = add i32 %i3, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i4 = and i32 %add7.3, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.4 = add i32 %i4, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i5 = and i32 %add7.4, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.5 = add i32 %i5, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i6 = and i32 %add7.5, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.6 = add i32 %i6, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i7 = and i32 %add7.6, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.7 = add i32 %i7, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i8 = and i32 %add7.7, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.8 = add i32 %i8, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i9 = and i32 %add7.8, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.9 = add i32 %i9, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i10 = and i32 %add7.9, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.10 = add i32 %i10, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i11 = and i32 %add7.10, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.11 = add i32 %i11, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i12 = and i32 %add7.11, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.12 = add i32 %i12, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i13 = and i32 %add7.12, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.13 = add i32 %i13, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i14 = and i32 %add7.13, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.14 = add i32 %i14, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i15 = and i32 %add7.14, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.15 = add i32 %i15, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i16 = and i32 %add7.15, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: Determining loop execution counts for: @pr58402_large_number_of_zext +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable predicated backedge-taken count. +; +entry: + br label %header + +header: + %d.0 = phi i32 [ 0, %entry ], [ %add7.15, %header ] + %b.0 = phi i32 [ 59, %entry ], [ %b.0, %header ] + %cmp = icmp slt i32 %b.0, 1 + %conv.neg = sext i1 %cmp to i32 + %conv = zext i1 %cmp to i32 + %i = and i32 %conv, -2 + %add7 = add i32 %i, 4 + %i1 = and i32 %add7, -2 + %add7.1 = add i32 %i1, 4 + %i2 = and i32 %add7.1, -2 + %add7.2 = add i32 %i2, 4 + %i3 = and i32 %add7.2, -2 + %add7.3 = add i32 %i3, 4 + %i4 = and i32 %add7.3, -2 + %add7.4 = add i32 %i4, 4 + %i5 = and i32 %add7.4, -2 + %add7.5 = add i32 %i5, 4 + %i6 = and i32 %add7.5, -2 + %add7.6 = add i32 %i6, 4 + %i7 = and i32 %add7.6, -2 + %add7.7 = add i32 %i7, 4 + %i8 = and i32 %add7.7, -2 + %add7.8 = add i32 %i8, 4 + %i9 = and i32 %add7.8, -2 + %add7.9 = add i32 %i9, 4 + %i10 = and i32 %add7.9, -2 + %add7.10 = add i32 %i10, 4 + %i11 = and i32 %add7.10, -2 + %add7.11 = add i32 %i11, 4 + %i12 = and i32 %add7.11, -2 + %add7.12 = add i32 %i12, 4 + %i13 = and i32 %add7.12, -2 + %add7.13 = add i32 %i13, 4 + %i14 = and i32 %add7.13, -2 + %add7.14 = add i32 %i14, 4 + %i15 = and i32 %add7.14, -2 + %add7.15 = add i32 %i15, 4 + %i16 = and i32 %add7.15, -2 + store i32 %add7.15, ptr %dst, align 4 + br label %header +} diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll --- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll @@ -928,27 +928,27 @@ ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]] ; CHECK: guarded: -; CHECK-NEXT: [[ICMP_USER:%.*]] = icmp ne i32 [[FOO]], [[X:%.*]] -; CHECK-NEXT: br i1 [[ICMP_USER]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[ICMP_USER_WIDE:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[ICMP_USER_WIDE]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @use(i32 -1) -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; CHECK: failure: -; CHECK-NEXT: [[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] -; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA2]]) +; CHECK-NEXT: [[FOO_LCSSA2_WIDE:%.*]] = phi i64 [ [[TMP1]], [[BACKEDGE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[FOO_LCSSA2_WIDE]] to i32 +; CHECK-NEXT: call void @use(i32 [[TMP4]]) ; CHECK-NEXT: unreachable ; CHECK: side_exit: ; CHECK-NEXT: ret i32 0