diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -564,6 +564,8 @@ const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + unsigned Depth = 0); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getCastExpr(SCEVTypes Kind, const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); @@ -1277,6 +1279,11 @@ /// This is a cache of the values we have analyzed so far. ValueExprMapType ValueExprMap; + /// This is a cache for ZExt expressions for a (SCEV, Ty) pair. + DenseMap, const SCEV *> ZExtCache; + DenseMap, 2>> + ZExtCacheUser; + /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1602,6 +1602,25 @@ assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); + + auto Key = std::make_pair(Op, Ty); + auto Iter = ZExtCache.insert({Key, nullptr}); + if (Iter.second || !Iter.first->second) { + auto *R = getZeroExtendExprImpl(Op, Ty, Depth); + ZExtCache[Key] = R; + + auto Users = ZExtCacheUser.insert({R, {}}); + Users.first->second.emplace_back(Op, Ty); + } + return ZExtCache[Key]; +} + +const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + unsigned Depth) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); + assert(!Op->getType()->isPointerTy() && "Can't extend pointer!"); Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. @@ -13719,6 +13738,11 @@ forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt()); BECountUsers.erase(BEUsersIt); } + + auto ZExtUser = ZExtCacheUser.find(S); + if (ZExtUser != ZExtCacheUser.end()) + for (auto &KV : ZExtUser->second) + ZExtCache.erase(KV); } void diff --git a/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr58402-large-number-of-zext-exprs.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +define i32 @pr58402_large_number_of_zext(ptr %dst) { +; CHECK-LABEL: 'pr58402_large_number_of_zext' +; CHECK-NEXT: Classifying expressions for: @pr58402_large_number_of_zext +; CHECK-NEXT: %d.0 = phi i32 [ 0, %entry ], [ %add7.15, %header ] +; CHECK-NEXT: --> %d.0 U: [0,65) S: [0,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %b.0 = phi i32 [ 59, %entry ], [ %b.0, %header ] +; CHECK-NEXT: --> 59 U: [59,60) S: [59,60) Exits: 59 LoopDispositions: { %header: Invariant } +; CHECK-NEXT: %conv.neg = sext i1 %cmp to i32 +; CHECK-NEXT: --> (sext i1 %cmp to i32) U: [-1,1) S: [-1,1) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %conv = zext i1 %cmp to i32 +; CHECK-NEXT: --> (zext i1 %cmp to i32) U: [0,2) S: [0,2) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i = and i32 %conv, -2 +; CHECK-NEXT: --> (2 * ((zext i1 %cmp to i32) /u 2)) U: [0,1) S: [0,1) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7 = add i32 %i, 4 +; CHECK-NEXT: --> (4 + (2 * ((zext i1 %cmp to i32) /u 2))) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i1 = and i32 %add7, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2)) U: [4,5) S: [4,5) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.1 = add i32 %i1, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i2 = and i32 %add7.1, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2)) U: [8,9) S: [8,9) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.2 = add i32 %i2, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i3 = and i32 %add7.2, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2)) U: [12,13) S: [12,13) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.3 = add i32 %i3, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i4 = and i32 %add7.3, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [16,17) S: [16,17) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.4 = add i32 %i4, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i5 = and i32 %add7.4, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [20,21) S: [20,21) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.5 = add i32 %i5, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i6 = and i32 %add7.5, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [24,25) S: [24,25) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.6 = add i32 %i6, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i7 = and i32 %add7.6, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [28,29) S: [28,29) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.7 = add i32 %i7, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i8 = and i32 %add7.7, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [32,33) S: [32,33) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.8 = add i32 %i8, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i9 = and i32 %add7.8, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [36,37) S: [36,37) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.9 = add i32 %i9, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i10 = and i32 %add7.9, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [40,41) S: [40,41) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.10 = add i32 %i10, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i11 = and i32 %add7.10, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [44,45) S: [44,45) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.11 = add i32 %i11, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i12 = and i32 %add7.11, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [48,49) S: [48,49) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.12 = add i32 %i12, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i13 = and i32 %add7.12, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [52,53) S: [52,53) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.13 = add i32 %i13, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i14 = and i32 %add7.13, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [56,57) S: [56,57) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.14 = add i32 %i14, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i15 = and i32 %add7.14, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [60,61) S: [60,61) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %add7.15 = add i32 %i15, 4 +; CHECK-NEXT: --> (4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: %i16 = and i32 %add7.15, -2 +; CHECK-NEXT: --> (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((4 + (2 * ((zext i1 %cmp to i32) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2))) /u 2)) U: [64,65) S: [64,65) Exits: <> LoopDispositions: { %header: Variant } +; CHECK-NEXT: Determining loop execution counts for: @pr58402_large_number_of_zext +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable predicated backedge-taken count. +; +entry: + br label %header + +header: + %d.0 = phi i32 [ 0, %entry ], [ %add7.15, %header ] + %b.0 = phi i32 [ 59, %entry ], [ %b.0, %header ] + %cmp = icmp slt i32 %b.0, 1 + %conv.neg = sext i1 %cmp to i32 + %conv = zext i1 %cmp to i32 + %i = and i32 %conv, -2 + %add7 = add i32 %i, 4 + %i1 = and i32 %add7, -2 + %add7.1 = add i32 %i1, 4 + %i2 = and i32 %add7.1, -2 + %add7.2 = add i32 %i2, 4 + %i3 = and i32 %add7.2, -2 + %add7.3 = add i32 %i3, 4 + %i4 = and i32 %add7.3, -2 + %add7.4 = add i32 %i4, 4 + %i5 = and i32 %add7.4, -2 + %add7.5 = add i32 %i5, 4 + %i6 = and i32 %add7.5, -2 + %add7.6 = add i32 %i6, 4 + %i7 = and i32 %add7.6, -2 + %add7.7 = add i32 %i7, 4 + %i8 = and i32 %add7.7, -2 + %add7.8 = add i32 %i8, 4 + %i9 = and i32 %add7.8, -2 + %add7.9 = add i32 %i9, 4 + %i10 = and i32 %add7.9, -2 + %add7.10 = add i32 %i10, 4 + %i11 = and i32 %add7.10, -2 + %add7.11 = add i32 %i11, 4 + %i12 = and i32 %add7.11, -2 + %add7.12 = add i32 %i12, 4 + %i13 = and i32 %add7.12, -2 + %add7.13 = add i32 %i13, 4 + %i14 = and i32 %add7.13, -2 + %add7.14 = add i32 %i14, 4 + %i15 = and i32 %add7.14, -2 + %add7.15 = add i32 %i15, 4 + %i16 = and i32 %add7.15, -2 + store i32 %add7.15, ptr %dst, align 4 + br label %header +} diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll --- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll @@ -928,27 +928,27 @@ ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]] ; CHECK: guarded: -; CHECK-NEXT: [[ICMP_USER:%.*]] = icmp ne i32 [[FOO]], [[X:%.*]] -; CHECK-NEXT: br i1 [[ICMP_USER]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[ICMP_USER_WIDE:%.*]] = icmp ne i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: br i1 [[ICMP_USER_WIDE]], label [[BACKEDGE]], label [[SIDE_EXIT:%.*]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @use(i32 -1) -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; CHECK: failure: -; CHECK-NEXT: [[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] -; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA2]]) +; CHECK-NEXT: [[FOO_LCSSA2_WIDE:%.*]] = phi i64 [ [[TMP1]], [[BACKEDGE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[FOO_LCSSA2_WIDE]] to i32 +; CHECK-NEXT: call void @use(i32 [[TMP4]]) ; CHECK-NEXT: unreachable ; CHECK: side_exit: ; CHECK-NEXT: ret i32 0