diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -1111,6 +1111,13 @@ return Res; } + /// Get a cached analysis. This may be stale due to inner + /// transforms, but the caller is okay with that. + template + typename PassT::Result *getStaleResult(IRUnitTParam &IR) const { + return OuterAM->template getCachedResult(IR); + } + /// Method provided for unit testing, not intended for general use. template bool cachedResultExists(IRUnitTParam &IR) const { diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -10,6 +10,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -206,6 +207,13 @@ AM.getResult(F), BFI, MSSA}; + // Divergence analysis is neither invalidated nor preserved by loop + // transforms. But it can be useful even when stale in some loop + // transforms. We make sure it is computed before starting the loop + // passes. For example, see the use of getStaleResult() in + // SimpleLoopUnswitch. + if (LAR.TTI.hasBranchDivergence()) + (void)AM.getResult(F); // Setup the loop analysis manager from its proxy. It is important that // this is only done when there are loops to process and we have built the diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -2600,12 +2601,13 @@ static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, + const DivergenceInfo *StaleDA, function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). - SmallVector>, 4> - UnswitchCandidates; + using CandidateType = std::pair>; + SmallVector UnswitchCandidates; // Whether or not we should also collect guards in the loop. bool CollectGuards = false; @@ -2661,6 +2663,17 @@ UnswitchCandidates.push_back({BI, std::move(Invariants)}); } + if (TTI.hasBranchDivergence()) { + llvm::erase_if(UnswitchCandidates, [&](const CandidateType &C) { + if (!StaleDA || StaleDA->isDivergent(*C.first)) { + LLVM_DEBUG(dbgs() << " Skip divergent candidate: " << *C.first + << "\n"); + return true; + } + return false; + }); + } + // If we didn't find any candidates, we're done. if (UnswitchCandidates.empty()) return false; @@ -2870,6 +2883,12 @@ /// updated based on the unswitch. /// The `MSSA` analysis is also updated if valid (i.e. its use is enabled). /// +/// The `StaleDA` analysis is useful for skipping divergent branches +/// if it is available: unswitching such a branch is expensive on +/// targets that have divergence. The analysis is stale since other +/// loop transforms neither preserve nor update it, but it is safe for +/// skipping branches. +/// /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is /// true, we will attempt to do non-trivial unswitching as well as trivial /// unswitching. @@ -2882,7 +2901,7 @@ /// done. static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, TargetTransformInfo &TTI, - bool NonTrivial, + const DivergenceInfo *StaleDA, bool NonTrivial, function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { assert(L.isRecursivelyLCSSAForm(DT, LI) && @@ -2922,7 +2941,7 @@ // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB, SE, MSSAU)) + if (unswitchBestCondition(L, DT, LI, AC, TTI, StaleDA, UnswitchCB, SE, MSSAU)) return true; // No other opportunities to unswitch. @@ -2962,8 +2981,17 @@ if (VerifyMemorySSA) AR.MSSA->verifyMemorySSA(); } - if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, NonTrivial, UnswitchCB, - &AR.SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) + + const DivergenceInfo *StaleDA = nullptr; + if (AR.TTI.hasBranchDivergence()) { + auto &FAM = AM.getResult(L, AR); + StaleDA = FAM.getStaleResult(F); + assert(StaleDA); + } + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.TTI, StaleDA, NonTrivial, + UnswitchCB, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3049,7 +3077,8 @@ if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); - bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, NonTrivial, UnswitchCB, SE, + bool Changed = unswitchLoop(*L, DT, LI, AC, TTI, /* StaleDA = */ nullptr, + NonTrivial, UnswitchCB, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); if (MSSA && VerifyMemorySSA) diff --git a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll --- a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll +++ b/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll @@ -1,7 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -O3 -S -enable-new-pm=0 %s | FileCheck %s - -; This fails with the new pass manager: -; https://bugs.llvm.org/show_bug.cgi?id=48819 +; RUN: opt -mtriple=amdgcn-- -O3 -S %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -simple-loop-unswitch -enable-nontrivial-unswitch -simplifycfg -loop-deletion -simplifycfg -S %s | FileCheck %s ; Check that loop unswitch happened and condition hoisted out of the loop. ; Condition is uniform so all targets should perform unswitching.