Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1459,6 +1459,13 @@ SmallVector, 2>> LoopDispositions; + /// Cache for \c isGuaranteedToTransferExecutionToSuccessor(BB) + DenseMap BlockTransferExecutionToSuccessorCache; + + /// A version of the ValueTracking routine, but cached for efficiency, and + /// restricted to blocks inside a Loop. + bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB); + struct LoopProperties { /// Set to true if the loop contains no instruction that can abnormally exit /// the loop (i.e. via throwing an exception, by terminating the thread Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -6631,24 +6631,102 @@ return Bound ? Bound : &*F.getEntryBlock().begin(); } +bool ScalarEvolution:: +isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) { + assert(LI.getLoopFor(BB) && "must be in a loop for invalidation!"); + if (!BlockTransferExecutionToSuccessorCache.count(BB)) + BlockTransferExecutionToSuccessorCache[BB] = + llvm::isGuaranteedToTransferExecutionToSuccessor(BB); + + return BlockTransferExecutionToSuccessorCache[BB]; +} + bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A, const Instruction *B) { if (A->getParent() == B->getParent() && - isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), - B->getIterator())) + llvm::isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), + B->getIterator())) return true; - auto *BLoop = LI.getLoopFor(B->getParent()); - if (BLoop && BLoop->getHeader() == B->getParent() && - BLoop->getLoopPreheader() == A->getParent() && - isGuaranteedToTransferExecutionToSuccessor(A->getIterator(), - A->getParent()->end()) && - isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(), - B->getIterator())) - return true; - return false; -} + // The current implementation only handles the case where A dominates B. + // Note that cases where A does not dominate B, but where A must reach B do + // exist. An example would be B in the header of some loop L, and A in the + // (unconditional) latch block of L. + if (!DT.dominates(A->getParent(), B->getParent())) + return false; + + // Unreachable CFGs can get very weird. To avoid infinite loops below, + // require reachability + if (!DT.isReachableFromEntry(B->getParent())) + return false; + + // First find a path from B to A where if all blocks along path + // are transparent, then we can prove A reaches B. Defer the actual + // checks for transparence until the end as (even cached) that's expensive. + SmallVector Path; + Path.push_back(B->getParent()); + + auto getPrevBB = [&](const BasicBlock *BB) -> const BasicBlock * { + auto *L = LI.getLoopFor(BB); + if (L && L->getHeader() == BB) + return L->getLoopPreheader(); + + auto *PrevBB = BB->getUniquePredecessor(); + return PrevBB && PrevBB->getUniqueSuccessor() ? PrevBB : nullptr; + }; + + auto *PrevBB = getPrevBB(B->getParent()); + if (!PrevBB) + return false; + + while (true) { + Path.push_back(PrevBB); + if (PrevBB == A->getParent()) + break; + PrevBB = getPrevBB(PrevBB); + if (!PrevBB) + return false; + } + assert(Path.front() == B->getParent()); + assert(Path.back() == A->getParent()); + assert(Path.size() >= 2); + + // We rely on forgetLoop for invalidation of the cache, as a result, we + // can only query blocks in loops. This restriction can be removed once + // we find a better cache update mechanism. + for (unsigned i = 1; i < Path.size() - 1; i++) + if (!LI.getLoopFor(Path[i])) + return false; + + // Do the cacheable part first + for (unsigned i = 1; i < Path.size() - 1; i++) + if (!isGuaranteedToTransferExecutionToSuccessor(Path[i])) + return false; + + // Finally, check the prefix of B's block and the suffix of A's. For the + // local search, we use the block local cache as a filter. + auto doLocalSearch = [&](BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (Begin == End) + return true; + + auto *BB = Begin->getParent(); + + // Knowing the block isn't transparent isn't enough to answer this query; + // we'd need to know where in the block the blockage is. + if (BlockTransferExecutionToSuccessorCache.count(BB) && + BlockTransferExecutionToSuccessorCache[BB]) + return true; + + // It's tempting to cache this, but since we're using a bounded search + // here, we'd risk saving false positives into the block cache. + return ::isGuaranteedToTransferExecutionToSuccessor(Begin, End); + }; + + return (doLocalSearch(B->getParent()->begin(), B->getIterator()) && + doLocalSearch(A->getIterator(), A->getParent()->end())); +} bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { // Only proceed if we can prove that I does not yield poison. @@ -6755,15 +6833,22 @@ LoopProperties LP = {/* HasNoAbnormalExits */ true, /*HasNoSideEffects*/ true}; - for (auto *BB : L->getBlocks()) + for (auto *BB : L->getBlocks()) { + LoopProperties Local = {true, true}; for (auto &I : *BB) { - if (!isGuaranteedToTransferExecutionToSuccessor(&I)) - LP.HasNoAbnormalExits = false; + if (!llvm::isGuaranteedToTransferExecutionToSuccessor(&I)) + Local.HasNoAbnormalExits = false; if (HasSideEffects(&I)) - LP.HasNoSideEffects = false; - if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) + Local.HasNoSideEffects = false; + if (!Local.HasNoAbnormalExits && !Local.HasNoSideEffects) break; // We're already as pessimistic as we can get. } + BlockTransferExecutionToSuccessorCache[BB] = Local.HasNoAbnormalExits; + LP.HasNoAbnormalExits &= Local.HasNoAbnormalExits; + LP.HasNoSideEffects &= Local.HasNoSideEffects; + if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) + break; // We're already as pessimistic as we can get. + } auto InsertPair = LoopPropertiesCache.insert({L, LP}); assert(InsertPair.second && "We just checked!"); @@ -7506,6 +7591,7 @@ // result. BackedgeTakenCounts.clear(); PredicatedBackedgeTakenCounts.clear(); + BlockTransferExecutionToSuccessorCache.clear(); LoopPropertiesCache.clear(); ConstantEvolutionLoopExitValue.clear(); ValueExprMap.clear(); @@ -7570,6 +7656,13 @@ PushDefUseChildren(I, Worklist); } + // Removing the blocks is probably overly conservative here, but it's not + // clear what client code might assume that forgetLoop handled the code + // motion invalidation required. + if (auto *BB = CurrL->getLoopPreheader()) + BlockTransferExecutionToSuccessorCache.erase(BB); + for (auto *BB : CurrL->getBlocks()) + BlockTransferExecutionToSuccessorCache.erase(BB); LoopPropertiesCache.erase(CurrL); // Forget all contained loops too, to avoid dangling entries in the // ValuesAtScopes map. @@ -12383,6 +12476,7 @@ std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), LoopDispositions(std::move(Arg.LoopDispositions)), + BlockTransferExecutionToSuccessorCache(std::move(Arg.BlockTransferExecutionToSuccessorCache)), LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), UnsignedRanges(std::move(Arg.UnsignedRanges)), Index: llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll =================================================================== --- llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll +++ llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] +; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: Index: llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll =================================================================== --- llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll +++ llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] +; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { entry: Index: llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -451,9 +451,9 @@ ; CHECK-NEXT: %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %index32 = add nsw i32 %i, %offset -; CHECK-NEXT: --> {%offset,+,1}<%loop> U: full-set S: full-set Exits: (-1 + %offset + %numIterations) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%offset,+,1}<%loop> U: full-set S: full-set Exits: (-1 + %offset + %numIterations) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr = getelementptr inbounds float, float* %input, i32 %index32 -; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,1}<%loop> to i64)) + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-1 + %offset + %numIterations) to i64)) + %input) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {((4 * (sext i32 %offset to i64)) + %input),+,4}<%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + %numIterations) to i64)) + (4 * (sext i32 %offset to i64)) + %input) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %nexti = add nsw i32 %i, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test-add-not-header Index: llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll +++ llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: %idxprom20 = zext i32 %storemerge1921 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%for.cond6> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%for.cond6> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i = load i16, i16* %arrayidx7, align 2 ; CHECK-NEXT: --> %i U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant } ; CHECK-NEXT: %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ] @@ -45,7 +45,7 @@ ; CHECK-NEXT: %idxprom20.3 = zext i32 %storemerge1921.3 to i64 ; CHECK-NEXT: --> {3,+,4294967295}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20.3 -; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } +; CHECK-NEXT: --> {(6 + @__const.f.g),+,8589934590}<%inner.loop> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant } ; CHECK-NEXT: %i7 = load i16, i16* %arrayidx7.3, align 2 ; CHECK-NEXT: --> %i7 U: full-set S: full-set Exits: <> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant } ; CHECK-NEXT: %i8 = load volatile i32, i32* @b, align 4 Index: llvm/test/Analysis/ScalarEvolution/nsw.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/nsw.ll +++ llvm/test/Analysis/ScalarEvolution/nsw.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: %phitmp = sext i32 %tmp8 to i64 ; CHECK-NEXT: --> {1,+,1}<%bb> U: [1,-9223372036854775808) S: [1,-9223372036854775808) Exits: <> LoopDispositions: { %bb: Computable } ; CHECK-NEXT: %tmp9 = getelementptr inbounds double, double* %p, i64 %phitmp -; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <> LoopDispositions: { %bb: Computable } +; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <> LoopDispositions: { %bb: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test1 ; CHECK-NEXT: Loop %bb: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb: Unpredictable max backedge-taken count.