diff --git a/llvm/docs/ConvergenceAndUniformity.rst b/llvm/docs/ConvergenceAndUniformity.rst --- a/llvm/docs/ConvergenceAndUniformity.rst +++ b/llvm/docs/ConvergenceAndUniformity.rst @@ -51,7 +51,7 @@ This document presents a definition of convergence that is reasonable for real targets and is compatible with the currently implicit semantics of convergent operations in LLVM IR. This is accompanied by -a *uniformity analysis* that extends the existing divergence analysis +a *uniformity analysis* that extends previous work on divergence analysis [DivergenceSPMD]_ to cover irreducible control-flow. .. [DivergenceSPMD] Julian Rosemann, Simon Moll, and Sebastian diff --git a/llvm/include/llvm/ADT/GenericUniformityInfo.h b/llvm/include/llvm/ADT/GenericUniformityInfo.h --- a/llvm/include/llvm/ADT/GenericUniformityInfo.h +++ b/llvm/include/llvm/ADT/GenericUniformityInfo.h @@ -9,11 +9,7 @@ #ifndef LLVM_ADT_GENERICUNIFORMITYINFO_H #define LLVM_ADT_GENERICUNIFORMITYINFO_H -// #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GenericCycleInfo.h" -// #include "llvm/ADT/SmallPtrSet.h" -// #include "llvm/ADT/Uniformity.h" -// #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h deleted file mode 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ /dev/null @@ -1,210 +0,0 @@ -//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// \file -// The divergence analysis determines which instructions and branches are -// divergent given a set of divergent source instructions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H -#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H - -#include "llvm/ADT/DenseSet.h" -#include "llvm/Analysis/SyncDependenceAnalysis.h" -#include "llvm/IR/PassManager.h" -#include - -namespace llvm { -class Function; -class Instruction; -class Loop; -class raw_ostream; -class TargetTransformInfo; -class Value; - -/// \brief Generic divergence analysis for reducible CFGs. -/// -/// This analysis propagates divergence in a data-parallel context from sources -/// of divergence to all users. It requires reducible CFGs. All assignments -/// should be in SSA form. -class DivergenceAnalysisImpl { -public: - /// \brief This instance will analyze the whole function \p F or the loop \p - /// RegionLoop. - /// - /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop. - /// Otherwise the whole function is analyzed. - /// \param IsLCSSAForm whether the analysis may assume that the IR in the - /// region in LCSSA form. - DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, - const DominatorTree &DT, const LoopInfo &LI, - SyncDependenceAnalysis &SDA, bool IsLCSSAForm); - - /// \brief The loop that defines the analyzed region (if any). - const Loop *getRegionLoop() const { return RegionLoop; } - const Function &getFunction() const { return F; } - - /// \brief Whether \p BB is part of the region. - bool inRegion(const BasicBlock &BB) const; - /// \brief Whether \p I is part of the region. - bool inRegion(const Instruction &I) const; - - /// \brief Mark \p UniVal as a value that is always uniform. - void addUniformOverride(const Value &UniVal); - - /// \brief Mark \p DivVal as a value that is always divergent. Will not do so - /// if `isAlwaysUniform(DivVal)`. - /// \returns Whether the tracked divergence state of \p DivVal changed. - bool markDivergent(const Value &DivVal); - - /// \brief Propagate divergence to all instructions in the region. - /// Divergence is seeded by calls to \p markDivergent. - void compute(); - - /// \brief Whether any value was marked or analyzed to be divergent. - bool hasDetectedDivergence() const { return !DivergentValues.empty(); } - - /// \brief Whether \p Val will always return a uniform value regardless of its - /// operands - bool isAlwaysUniform(const Value &Val) const; - - /// \brief Whether \p Val is divergent at its definition. - bool isDivergent(const Value &Val) const; - - /// \brief Whether \p U is divergent. Uses of a uniform value can be - /// divergent. - bool isDivergentUse(const Use &U) const; - -private: - /// \brief Mark \p Term as divergent and push all Instructions that become - /// divergent as a result on the worklist. - void analyzeControlDivergence(const Instruction &Term); - /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on - /// the worklist. - void taintAndPushPhiNodes(const BasicBlock &JoinBlock); - - /// \brief Identify all Instructions that become divergent because \p DivExit - /// is a divergent loop exit of \p DivLoop. Mark those instructions as - /// divergent and push them on the worklist. - void propagateLoopExitDivergence(const BasicBlock &DivExit, - const Loop &DivLoop); - - /// \brief Internal implementation function for propagateLoopExitDivergence. - void analyzeLoopExitDivergence(const BasicBlock &DivExit, - const Loop &OuterDivLoop); - - /// \brief Mark all instruction as divergent that use a value defined in \p - /// OuterDivLoop. Push their users on the worklist. - void analyzeTemporalDivergence(const Instruction &I, - const Loop &OuterDivLoop); - - /// \brief Push all users of \p Val (in the region) to the worklist. - void pushUsers(const Value &I); - - /// \brief Whether \p Val is divergent when read in \p ObservingBlock. - bool isTemporalDivergent(const BasicBlock &ObservingBlock, - const Value &Val) const; - -private: - const Function &F; - // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. - // Otherwise, analyze the whole function - const Loop *RegionLoop; - - const DominatorTree &DT; - const LoopInfo &LI; - - // Recognized divergent loops - DenseSet DivergentLoops; - - // The SDA links divergent branches to divergent control-flow joins. - SyncDependenceAnalysis &SDA; - - // Use simplified code path for LCSSA form. - bool IsLCSSAForm; - - // Set of known-uniform values. - DenseSet UniformOverrides; - - // Detected/marked divergent values. - DenseSet DivergentValues; - - // Internal worklist for divergence propagation. - std::vector Worklist; -}; - -class DivergenceInfo { - Function &F; - - // If the function contains an irreducible region the divergence - // analysis can run indefinitely. We set ContainsIrreducible and no - // analysis is actually performed on the function. All values in - // this function are conservatively reported as divergent instead. - bool ContainsIrreducible = false; - std::unique_ptr SDA; - std::unique_ptr DA; - -public: - DivergenceInfo(Function &F, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const TargetTransformInfo &TTI, bool KnownReducible); - - /// Whether any divergence was detected. - bool hasDivergence() const { - return ContainsIrreducible || DA->hasDetectedDivergence(); - } - - /// The GPU kernel this analysis result is for - const Function &getFunction() const { return F; } - - /// Whether \p V is divergent at its definition. - bool isDivergent(const Value &V) const { - return ContainsIrreducible || DA->isDivergent(V); - } - - /// Whether \p U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use &U) const { - return ContainsIrreducible || DA->isDivergentUse(U); - } - - /// Whether \p V is uniform/non-divergent. - bool isUniform(const Value &V) const { return !isDivergent(V); } - - /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be - /// divergent. - bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } -}; - -/// \brief Divergence analysis frontend for GPU kernels. -class DivergenceAnalysis : public AnalysisInfoMixin { - friend AnalysisInfoMixin; - - static AnalysisKey Key; - -public: - using Result = DivergenceInfo; - - /// Runs the divergence analysis on @F, a GPU kernel - Result run(Function &F, FunctionAnalysisManager &AM); -}; - -/// Printer pass to dump divergence analysis results. -struct DivergenceAnalysisPrinterPass - : public PassInfoMixin { - DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} - - PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); - -private: - raw_ostream &OS; -}; // class DivergenceAnalysisPrinterPass - -} // namespace llvm - -#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h deleted file mode 100644 --- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ /dev/null @@ -1,103 +0,0 @@ -//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// The kernel divergence analysis is an LLVM pass which can be used to find out -// if a branch instruction in a GPU program (kernel) is divergent or not. It can help -// branch optimizations such as jump threading and loop unswitching to make -// better decisions. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H -#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H - -#include "llvm/ADT/DenseSet.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" -#include - -namespace llvm { -class DivergenceInfo; -class Function; -class Module; -class raw_ostream; -class TargetTransformInfo; -class Use; -class Value; - -class LegacyDivergenceAnalysisImpl { -public: - // Returns true if V is divergent at its definition. - bool isDivergent(const Value *V) const; - - // Returns true if U is divergent. Uses of a uniform value can be divergent. - bool isDivergentUse(const Use *U) const; - - // Returns true if V is uniform/non-divergent. - bool isUniform(const Value *V) const { return !isDivergent(V); } - - // Returns true if U is uniform/non-divergent. Uses of a uniform value can be - // divergent. - bool isUniformUse(const Use *U) const { return !isDivergentUse(U); } - - // Keep the analysis results uptodate by removing an erased value. - void removeValue(const Value *V) { DivergentValues.erase(V); } - - // Print all divergent branches in the function. - void print(raw_ostream &OS, const Module *) const; - - // Whether analysis should be performed by GPUDivergenceAnalysis. - bool shouldUseGPUDivergenceAnalysis(const Function &F, - const TargetTransformInfo &TTI, - const LoopInfo &LI); - - void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, const LoopInfo &LI); - -protected: - // (optional) handle to new DivergenceAnalysis - std::unique_ptr gpuDA; - - // Stores all divergent values. - DenseSet DivergentValues; - - // Stores divergent uses of possibly uniform values. - DenseSet DivergentUses; -}; - -class LegacyDivergenceAnalysis : public FunctionPass, - public LegacyDivergenceAnalysisImpl { -public: - static char ID; - - LegacyDivergenceAnalysis(); - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; -}; - -class LegacyDivergenceAnalysisPass - : public PassInfoMixin, - public LegacyDivergenceAnalysisImpl { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - // (optional) handle to new DivergenceAnalysis - std::unique_ptr gpuDA; - - // Stores all divergent values. - DenseSet DivergentValues; - - // Stores divergent uses of possibly uniform values. - DenseSet DivergentUses; -}; - -} // end namespace llvm - -#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -46,13 +46,6 @@ // FunctionPass *createDelinearizationPass(); - //===--------------------------------------------------------------------===// - // - // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU - // program are divergent. - // - FunctionPass *createLegacyDivergenceAnalysisPass(); - //===--------------------------------------------------------------------===// // // Minor pass prototypes, allowing us to expose them through bugpoint and diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h deleted file mode 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ /dev/null @@ -1,92 +0,0 @@ -//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// \file -// This file defines the SyncDependenceAnalysis class, which computes for -// every divergent branch the set of phi nodes that the branch will make -// divergent. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H -#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H - -#include "llvm/ADT/SmallPtrSet.h" -#include -#include -#include -#include - -namespace llvm { - -class BasicBlock; -class DominatorTree; -class Instruction; -class LoopInfo; -class PostDominatorTree; - -using ConstBlockSet = SmallPtrSet; -struct ControlDivergenceDesc { - // Join points of divergent disjoint paths. - ConstBlockSet JoinDivBlocks; - // Divergent loop exits - ConstBlockSet LoopDivBlocks; -}; - -struct ModifiedPO { - std::vector LoopPO; - std::unordered_map POIndex; - void appendBlock(const BasicBlock &BB) { - POIndex[&BB] = LoopPO.size(); - LoopPO.push_back(&BB); - } - unsigned getIndexOf(const BasicBlock &BB) const { - return POIndex.find(&BB)->second; - } - unsigned size() const { return LoopPO.size(); } - const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; } -}; - -/// \brief Relates points of divergent control to join points in -/// reducible CFGs. -/// -/// This analysis relates points of divergent control to points of converging -/// divergent control. The analysis requires all loops to be reducible. -class SyncDependenceAnalysis { -public: - ~SyncDependenceAnalysis(); - SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT, - const LoopInfo &LI); - - /// \brief Computes divergent join points and loop exits caused by branch - /// divergence in \p Term. - /// - /// The set of blocks which are reachable by disjoint paths from \p Term. - /// The set also contains loop exits if there two disjoint paths: - /// one from \p Term to the loop exit and another from \p Term to the loop - /// header. Those exit blocks are added to the returned set. - /// If L is the parent loop of \p Term and an exit of L is in the returned - /// set then L is a divergent loop. - const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term); - -private: - static ControlDivergenceDesc EmptyDivergenceDesc; - - ModifiedPO LoopPO; - - const DominatorTree &DT; - const PostDominatorTree &PDT; - const LoopInfo &LI; - - std::map> - CachedControlDivDescs; -}; - -} // namespace llvm - -#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -404,16 +404,10 @@ /// branches. bool hasBranchDivergence() const; - /// Return true if the target prefers to use GPU divergence analysis to - /// replace the legacy version. - bool useGPUDivergenceAnalysis() const; - /// Returns whether V is a source of divergence. /// /// This function provides the target-dependent information for - /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis - /// first builds the dependency graph, and then runs the reachability - /// algorithm starting with the sources of divergence. + /// the target-independent UniformityAnalysis. bool isSourceOfDivergence(const Value *V) const; // Returns true for the target specific @@ -1687,7 +1681,6 @@ TargetCostKind CostKind) = 0; virtual BranchProbability getPredictableBranchThreshold() = 0; virtual bool hasBranchDivergence() = 0; - virtual bool useGPUDivergenceAnalysis() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; @@ -2057,9 +2050,6 @@ return Impl.getPredictableBranchThreshold(); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } - bool useGPUDivergenceAnalysis() override { - return Impl.useGPUDivergenceAnalysis(); - } bool isSourceOfDivergence(const Value *V) override { return Impl.isSourceOfDivergence(V); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -89,8 +89,6 @@ bool hasBranchDivergence() const { return false; } - bool useGPUDivergenceAnalysis() const { return false; } - bool isSourceOfDivergence(const Value *V) const { return false; } bool isAlwaysUniform(const Value *V) const { return false; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -278,8 +278,6 @@ bool hasBranchDivergence() { return false; } - bool useGPUDivergenceAnalysis() { return false; } - bool isSourceOfDivergence(const Value *V) { return false; } bool isAlwaysUniform(const Value *V) { return false; } diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -175,7 +175,6 @@ void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLazyValueInfoWrapperPassPass(PassRegistry&); -void initializeLegacyDivergenceAnalysisPass(PassRegistry&); void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -94,7 +94,6 @@ (void) llvm::createInstructionCombiningPass(); (void) llvm::createJMCInstrumenterPass(); (void) llvm::createLCSSAPass(); - (void) llvm::createLegacyDivergenceAnalysisPass(); (void) llvm::createLICMPass(); (void) llvm::createLoopSinkPass(); (void) llvm::createLazyValueInfoPass(); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -53,7 +53,6 @@ initializeLazyBlockFrequencyInfoPassPass(Registry); initializeLazyValueInfoWrapperPassPass(Registry); initializeLazyValueInfoPrinterPass(Registry); - initializeLegacyDivergenceAnalysisPass(Registry); initializeLintLegacyPassPass(Registry); initializeLoopInfoWrapperPassPass(Registry); initializeMemDepPrinterPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -56,7 +56,6 @@ DependenceAnalysis.cpp DependenceGraphBuilder.cpp DevelopmentModeInlineAdvisor.cpp - DivergenceAnalysis.cpp DomPrinter.cpp DomTreeUpdater.cpp DominanceFrontier.cpp @@ -83,7 +82,6 @@ LazyBlockFrequencyInfo.cpp LazyCallGraph.cpp LazyValueInfo.cpp - LegacyDivergenceAnalysis.cpp Lint.cpp Loads.cpp Local.cpp @@ -128,7 +126,6 @@ ScalarEvolutionNormalization.cpp StackLifetime.cpp StackSafetyAnalysis.cpp - SyncDependenceAnalysis.cpp SyntheticCountsUtils.cpp TFLiteUtils.cpp TargetLibraryInfo.cpp diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp deleted file mode 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ /dev/null @@ -1,409 +0,0 @@ -//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a general divergence analysis for loop vectorization -// and GPU programs. It determines which branches and values in a loop or GPU -// program are divergent. It can help branch optimizations such as jump -// threading and loop unswitching to make better decisions. -// -// GPU programs typically use the SIMD execution model, where multiple threads -// in the same execution group have to execute in lock-step. Therefore, if the -// code contains divergent branches (i.e., threads in a group do not agree on -// which path of the branch to take), the group of threads has to execute all -// the paths from that branch with different subsets of threads enabled until -// they re-converge. -// -// Due to this execution model, some optimizations such as jump -// threading and loop unswitching can interfere with thread re-convergence. -// Therefore, an analysis that computes which branches in a GPU program are -// divergent can help the compiler to selectively run these optimizations. -// -// This implementation is derived from the Vectorization Analysis of the -// Region Vectorizer (RV). The analysis is based on the approach described in -// -// An abstract interpretation for SPMD divergence -// on reducible control flow graphs. -// Julian Rosemann, Simon Moll and Sebastian Hack -// POPL '21 -// -// This implementation is generic in the sense that it does -// not itself identify original sources of divergence. -// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and -// (DivergenceAnalysis) for functions, identify the sources of divergence -// (e.g., special variables that hold the thread ID or the iteration variable). -// -// The generic implementation propagates divergence to variables that are data -// or sync dependent on a source of divergence. -// -// While data dependency is a well-known concept, the notion of sync dependency -// is worth more explanation. Sync dependence characterizes the control flow -// aspect of the propagation of branch divergence. For example, -// -// %cond = icmp slt i32 %tid, 10 -// br i1 %cond, label %then, label %else -// then: -// br label %merge -// else: -// br label %merge -// merge: -// %a = phi i32 [ 0, %then ], [ 1, %else ] -// -// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid -// because %tid is not on its use-def chains, %a is sync dependent on %tid -// because the branch "br i1 %cond" depends on %tid and affects which value %a -// is assigned to. -// -// The sync dependence detection (which branch induces divergence in which join -// points) is implemented in the SyncDependenceAnalysis. -// -// The current implementation has the following limitations: -// 1. intra-procedural. It conservatively considers the arguments of a -// non-kernel-entry function and the return value of a function call as -// divergent. -// 2. memory as black box. It conservatively considers values loaded from -// generic or local address as divergent. This can be improved by leveraging -// pointer analysis and/or by modelling non-escaping memory objects in SSA -// as done in RV. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "divergence" - -DivergenceAnalysisImpl::DivergenceAnalysisImpl( - const Function &F, const Loop *RegionLoop, const DominatorTree &DT, - const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm) - : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA), - IsLCSSAForm(IsLCSSAForm) {} - -bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) { - if (isAlwaysUniform(DivVal)) - return false; - assert(isa(DivVal) || isa(DivVal)); - assert(!isAlwaysUniform(DivVal) && "cannot be a divergent"); - return DivergentValues.insert(&DivVal).second; -} - -void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) { - UniformOverrides.insert(&UniVal); -} - -bool DivergenceAnalysisImpl::isTemporalDivergent( - const BasicBlock &ObservingBlock, const Value &Val) const { - const auto *Inst = dyn_cast(&Val); - if (!Inst) - return false; - // check whether any divergent loop carrying Val terminates before control - // proceeds to ObservingBlock - for (const auto *Loop = LI.getLoopFor(Inst->getParent()); - Loop != RegionLoop && !Loop->contains(&ObservingBlock); - Loop = Loop->getParentLoop()) { - if (DivergentLoops.contains(Loop)) - return true; - } - - return false; -} - -bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const { - return I.getParent() && inRegion(*I.getParent()); -} - -bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const { - return RegionLoop ? RegionLoop->contains(&BB) : (BB.getParent() == &F); -} - -void DivergenceAnalysisImpl::pushUsers(const Value &V) { - const auto *I = dyn_cast(&V); - - if (I && I->isTerminator()) { - analyzeControlDivergence(*I); - return; - } - - for (const auto *User : V.users()) { - const auto *UserInst = dyn_cast(User); - if (!UserInst) - continue; - - // only compute divergent inside loop - if (!inRegion(*UserInst)) - continue; - - // All users of divergent values are immediate divergent - if (markDivergent(*UserInst)) - Worklist.push_back(UserInst); - } -} - -static const Instruction *getIfCarriedInstruction(const Use &U, - const Loop &DivLoop) { - const auto *I = dyn_cast(&U); - if (!I) - return nullptr; - if (!DivLoop.contains(I)) - return nullptr; - return I; -} - -void DivergenceAnalysisImpl::analyzeTemporalDivergence( - const Instruction &I, const Loop &OuterDivLoop) { - if (isAlwaysUniform(I)) - return; - if (isDivergent(I)) - return; - - LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n"); - assert((isa(I) || !IsLCSSAForm) && - "In LCSSA form all users of loop-exiting defs are Phi nodes."); - for (const Use &Op : I.operands()) { - const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop); - if (!OpInst) - continue; - if (markDivergent(I)) - pushUsers(I); - return; - } -} - -// marks all users of loop-carried values of the loop headed by LoopHeader as -// divergent -void DivergenceAnalysisImpl::analyzeLoopExitDivergence( - const BasicBlock &DivExit, const Loop &OuterDivLoop) { - // All users are in immediate exit blocks - if (IsLCSSAForm) { - for (const auto &Phi : DivExit.phis()) { - analyzeTemporalDivergence(Phi, OuterDivLoop); - } - return; - } - - // For non-LCSSA we have to follow all live out edges wherever they may lead. - const BasicBlock &LoopHeader = *OuterDivLoop.getHeader(); - SmallVector TaintStack; - TaintStack.push_back(&DivExit); - - // Otherwise potential users of loop-carried values could be anywhere in the - // dominance region of DivLoop (including its fringes for phi nodes) - DenseSet Visited; - Visited.insert(&DivExit); - - do { - auto *UserBlock = TaintStack.pop_back_val(); - - // don't spread divergence beyond the region - if (!inRegion(*UserBlock)) - continue; - - assert(!OuterDivLoop.contains(UserBlock) && - "irreducible control flow detected"); - - // phi nodes at the fringes of the dominance region - if (!DT.dominates(&LoopHeader, UserBlock)) { - // all PHI nodes of UserBlock become divergent - for (const auto &Phi : UserBlock->phis()) { - analyzeTemporalDivergence(Phi, OuterDivLoop); - } - continue; - } - - // Taint outside users of values carried by OuterDivLoop. - for (const auto &I : *UserBlock) { - analyzeTemporalDivergence(I, OuterDivLoop); - } - - // visit all blocks in the dominance region - for (const auto *SuccBlock : successors(UserBlock)) { - if (!Visited.insert(SuccBlock).second) { - continue; - } - TaintStack.push_back(SuccBlock); - } - } while (!TaintStack.empty()); -} - -void DivergenceAnalysisImpl::propagateLoopExitDivergence( - const BasicBlock &DivExit, const Loop &InnerDivLoop) { - LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n"); - - // Find outer-most loop that does not contain \p DivExit - const Loop *DivLoop = &InnerDivLoop; - const Loop *OuterDivLoop = DivLoop; - const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit); - const unsigned LoopExitDepth = - ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0; - while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) { - DivergentLoops.insert(DivLoop); // all crossed loops are divergent - OuterDivLoop = DivLoop; - DivLoop = DivLoop->getParentLoop(); - } - LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName() - << "\n"); - - analyzeLoopExitDivergence(DivExit, *OuterDivLoop); -} - -// this is a divergent join point - mark all phi nodes as divergent and push -// them onto the stack. -void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) { - LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName() - << "\n"); - - // ignore divergence outside the region - if (!inRegion(JoinBlock)) { - return; - } - - // push non-divergent phi nodes in JoinBlock to the worklist - for (const auto &Phi : JoinBlock.phis()) { - if (isDivergent(Phi)) - continue; - // FIXME Theoretically ,the 'undef' value could be replaced by any other - // value causing spurious divergence. - if (Phi.hasConstantOrUndefValue()) - continue; - if (markDivergent(Phi)) - Worklist.push_back(&Phi); - } -} - -void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) { - LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName() - << "\n"); - - // Don't propagate divergence from unreachable blocks. - if (!DT.isReachableFromEntry(Term.getParent())) - return; - - const auto *BranchLoop = LI.getLoopFor(Term.getParent()); - - const auto &DivDesc = SDA.getJoinBlocks(Term); - - // Iterate over all blocks now reachable by a disjoint path join - for (const auto *JoinBlock : DivDesc.JoinDivBlocks) { - taintAndPushPhiNodes(*JoinBlock); - } - - assert(DivDesc.LoopDivBlocks.empty() || BranchLoop); - for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) { - propagateLoopExitDivergence(*DivExitBlock, *BranchLoop); - } -} - -void DivergenceAnalysisImpl::compute() { - // Initialize worklist. - auto DivValuesCopy = DivergentValues; - for (const auto *DivVal : DivValuesCopy) { - assert(isDivergent(*DivVal) && "Worklist invariant violated!"); - pushUsers(*DivVal); - } - - // All values on the Worklist are divergent. - // Their users may not have been updated yed. - while (!Worklist.empty()) { - const Instruction &I = *Worklist.back(); - Worklist.pop_back(); - - // propagate value divergence to users - assert(isDivergent(I) && "Worklist invariant violated!"); - pushUsers(I); - } -} - -bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const { - return UniformOverrides.contains(&V); -} - -bool DivergenceAnalysisImpl::isDivergent(const Value &V) const { - return DivergentValues.contains(&V); -} - -bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const { - Value &V = *U.get(); - Instruction &I = *cast(U.getUser()); - return isDivergent(V) || isTemporalDivergent(*I.getParent(), V); -} - -DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const TargetTransformInfo &TTI, - bool KnownReducible) - : F(F) { - if (!KnownReducible) { - using RPOTraversal = ReversePostOrderTraversal; - RPOTraversal FuncRPOT(&F); - if (containsIrreducibleCFG(FuncRPOT, LI)) { - ContainsIrreducible = true; - return; - } - } - SDA = std::make_unique(DT, PDT, LI); - DA = std::make_unique(F, nullptr, DT, LI, *SDA, - /* LCSSA */ false); - for (auto &I : instructions(F)) { - if (TTI.isSourceOfDivergence(&I)) { - DA->markDivergent(I); - } else if (TTI.isAlwaysUniform(&I)) { - DA->addUniformOverride(I); - } - } - for (auto &Arg : F.args()) { - if (TTI.isSourceOfDivergence(&Arg)) { - DA->markDivergent(Arg); - } - } - - DA->compute(); -} - -AnalysisKey DivergenceAnalysis::Key; - -DivergenceAnalysis::Result -DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { - auto &DT = AM.getResult(F); - auto &PDT = AM.getResult(F); - auto &LI = AM.getResult(F); - auto &TTI = AM.getResult(F); - - return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false); -} - -PreservedAnalyses -DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { - auto &DI = FAM.getResult(F); - OS << "'Divergence Analysis' for function '" << F.getName() << "':\n"; - if (DI.hasDivergence()) { - for (auto &Arg : F.args()) { - OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " "); - OS << Arg << "\n"; - } - for (const BasicBlock &BB : F) { - OS << "\n " << BB.getName() << ":\n"; - for (const auto &I : BB.instructionsWithoutDebug()) { - OS << (DI.isDivergent(I) ? "DIVERGENT: " : " "); - OS << I << "\n"; - } - } - } - return PreservedAnalyses::all(); -} diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp deleted file mode 100644 --- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ /dev/null @@ -1,435 +0,0 @@ -//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis -//Implementation -==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements divergence analysis which determines whether a branch -// in a GPU program is divergent.It can help branch optimizations such as jump -// threading and loop unswitching to make better decisions. -// -// GPU programs typically use the SIMD execution model, where multiple threads -// in the same execution group have to execute in lock-step. Therefore, if the -// code contains divergent branches (i.e., threads in a group do not agree on -// which path of the branch to take), the group of threads has to execute all -// the paths from that branch with different subsets of threads enabled until -// they converge at the immediately post-dominating BB of the paths. -// -// Due to this execution model, some optimizations such as jump -// threading and loop unswitching can be unfortunately harmful when performed on -// divergent branches. Therefore, an analysis that computes which branches in a -// GPU program are divergent can help the compiler to selectively run these -// optimizations. -// -// This file defines divergence analysis which computes a conservative but -// non-trivial approximation of all divergent branches in a GPU program. It -// partially implements the approach described in -// -// Divergence Analysis -// Sampaio, Souza, Collange, Pereira -// TOPLAS '13 -// -// The divergence analysis identifies the sources of divergence (e.g., special -// variables that hold the thread ID), and recursively marks variables that are -// data or sync dependent on a source of divergence as divergent. -// -// While data dependency is a well-known concept, the notion of sync dependency -// is worth more explanation. Sync dependence characterizes the control flow -// aspect of the propagation of branch divergence. For example, -// -// %cond = icmp slt i32 %tid, 10 -// br i1 %cond, label %then, label %else -// then: -// br label %merge -// else: -// br label %merge -// merge: -// %a = phi i32 [ 0, %then ], [ 1, %else ] -// -// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid -// because %tid is not on its use-def chains, %a is sync dependent on %tid -// because the branch "br i1 %cond" depends on %tid and affects which value %a -// is assigned to. -// -// The current implementation has the following limitations: -// 1. intra-procedural. It conservatively considers the arguments of a -// non-kernel-entry function and the return value of a function call as -// divergent. -// 2. memory as black box. It conservatively considers values loaded from -// generic or local address as divergent. This can be improved by leveraging -// pointer analysis. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Value.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include -using namespace llvm; - -#define DEBUG_TYPE "divergence" - -// transparently use the GPUDivergenceAnalysis -static cl::opt UseGPUDA("use-gpu-divergence-analysis", cl::init(false), - cl::Hidden, - cl::desc("turn the LegacyDivergenceAnalysis into " - "a wrapper for GPUDivergenceAnalysis")); - -namespace { - -class DivergencePropagator { -public: - DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - PostDominatorTree &PDT, DenseSet &DV, - DenseSet &DU) - : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {} - void populateWithSourcesOfDivergence(); - void propagate(); - -private: - // A helper function that explores data dependents of V. - void exploreDataDependency(Value *V); - // A helper function that explores sync dependents of TI. - void exploreSyncDependency(Instruction *TI); - // Computes the influence region from Start to End. This region includes all - // basic blocks on any simple path from Start to End. - void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End, - DenseSet &InfluenceRegion); - // Finds all users of I that are outside the influence region, and add these - // users to Worklist. - void findUsersOutsideInfluenceRegion( - Instruction &I, const DenseSet &InfluenceRegion); - - Function &F; - TargetTransformInfo &TTI; - DominatorTree &DT; - PostDominatorTree &PDT; - std::vector Worklist; // Stack for DFS. - DenseSet &DV; // Stores all divergent values. - DenseSet &DU; // Stores divergent uses of possibly uniform - // values. -}; - -void DivergencePropagator::populateWithSourcesOfDivergence() { - Worklist.clear(); - DV.clear(); - DU.clear(); - for (auto &I : instructions(F)) { - if (TTI.isSourceOfDivergence(&I)) { - Worklist.push_back(&I); - DV.insert(&I); - } - } - for (auto &Arg : F.args()) { - if (TTI.isSourceOfDivergence(&Arg)) { - Worklist.push_back(&Arg); - DV.insert(&Arg); - } - } -} - -void DivergencePropagator::exploreSyncDependency(Instruction *TI) { - // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's - // immediate post dominator are divergent. This rule handles if-then-else - // patterns. For example, - // - // if (tid < 5) - // a1 = 1; - // else - // a2 = 2; - // a = phi(a1, a2); // sync dependent on (tid < 5) - BasicBlock *ThisBB = TI->getParent(); - - // Unreachable blocks may not be in the dominator tree. - if (!DT.isReachableFromEntry(ThisBB)) - return; - - // If the function has no exit blocks or doesn't reach any exit blocks, the - // post dominator may be null. - DomTreeNode *ThisNode = PDT.getNode(ThisBB); - if (!ThisNode) - return; - - BasicBlock *IPostDom = ThisNode->getIDom()->getBlock(); - if (IPostDom == nullptr) - return; - - for (auto I = IPostDom->begin(); isa(I); ++I) { - // A PHINode is uniform if it returns the same value no matter which path is - // taken. - if (!cast(I)->hasConstantOrUndefValue() && DV.insert(&*I).second) - Worklist.push_back(&*I); - } - - // Propagation rule 2: if a value defined in a loop is used outside, the user - // is sync dependent on the condition of the loop exits that dominate the - // user. For example, - // - // int i = 0; - // do { - // i++; - // if (foo(i)) ... // uniform - // } while (i < tid); - // if (bar(i)) ... // divergent - // - // A program may contain unstructured loops. Therefore, we cannot leverage - // LoopInfo, which only recognizes natural loops. - // - // The algorithm used here handles both natural and unstructured loops. Given - // a branch TI, we first compute its influence region, the union of all simple - // paths from TI to its immediate post dominator (IPostDom). Then, we search - // for all the values defined in the influence region but used outside. All - // these users are sync dependent on TI. - DenseSet InfluenceRegion; - computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion); - // An insight that can speed up the search process is that all the in-region - // values that are used outside must dominate TI. Therefore, instead of - // searching every basic blocks in the influence region, we search all the - // dominators of TI until it is outside the influence region. - BasicBlock *InfluencedBB = ThisBB; - while (InfluenceRegion.count(InfluencedBB)) { - for (auto &I : *InfluencedBB) { - if (!DV.count(&I)) - findUsersOutsideInfluenceRegion(I, InfluenceRegion); - } - DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom(); - if (IDomNode == nullptr) - break; - InfluencedBB = IDomNode->getBlock(); - } -} - -void DivergencePropagator::findUsersOutsideInfluenceRegion( - Instruction &I, const DenseSet &InfluenceRegion) { - for (Use &Use : I.uses()) { - Instruction *UserInst = cast(Use.getUser()); - if (!InfluenceRegion.count(UserInst->getParent())) { - DU.insert(&Use); - if (DV.insert(UserInst).second) - Worklist.push_back(UserInst); - } - } -} - -// A helper function for computeInfluenceRegion that adds successors of "ThisBB" -// to the influence region. -static void -addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, - DenseSet &InfluenceRegion, - std::vector &InfluenceStack) { - for (BasicBlock *Succ : successors(ThisBB)) { - if (Succ != End && InfluenceRegion.insert(Succ).second) - InfluenceStack.push_back(Succ); - } -} - -void DivergencePropagator::computeInfluenceRegion( - BasicBlock *Start, BasicBlock *End, - DenseSet &InfluenceRegion) { - assert(PDT.properlyDominates(End, Start) && - "End does not properly dominate Start"); - - // The influence region starts from the end of "Start" to the beginning of - // "End". Therefore, "Start" should not be in the region unless "Start" is in - // a loop that doesn't contain "End". - std::vector InfluenceStack; - addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack); - while (!InfluenceStack.empty()) { - BasicBlock *BB = InfluenceStack.back(); - InfluenceStack.pop_back(); - addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack); - } -} - -void DivergencePropagator::exploreDataDependency(Value *V) { - // Follow def-use chains of V. - for (User *U : V->users()) { - if (!TTI.isAlwaysUniform(U) && DV.insert(U).second) - Worklist.push_back(U); - } -} - -void DivergencePropagator::propagate() { - // Traverse the dependency graph using DFS. - while (!Worklist.empty()) { - Value *V = Worklist.back(); - Worklist.pop_back(); - if (Instruction *I = dyn_cast(V)) { - // Terminators with less than two successors won't introduce sync - // dependency. Ignore them. - if (I->isTerminator() && I->getNumSuccessors() > 1) - exploreSyncDependency(I); - } - exploreDataDependency(V); - } -} - -} // namespace - -// Register this pass. -char LegacyDivergenceAnalysis::ID = 0; -LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) { - initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); -} -INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", - "Legacy Divergence Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence", - "Legacy Divergence Analysis", false, true) - -FunctionPass *llvm::createLegacyDivergenceAnalysisPass() { - return new LegacyDivergenceAnalysis(); -} - -bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis( - const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) { - if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis())) - return false; - - // GPUDivergenceAnalysis requires a reducible CFG. - using RPOTraversal = ReversePostOrderTraversal; - RPOTraversal FuncRPOT(&F); - return !containsIrreducibleCFG(FuncRPOT, LI); -} - -void LegacyDivergenceAnalysisImpl::run(Function &F, - llvm::TargetTransformInfo &TTI, - llvm::DominatorTree &DT, - llvm::PostDominatorTree &PDT, - const llvm::LoopInfo &LI) { - if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) { - // run the new GPU divergence analysis - gpuDA = std::make_unique(F, DT, PDT, LI, TTI, - /* KnownReducible = */ true); - - } else { - // run LLVM's existing DivergenceAnalysis - DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses); - DP.populateWithSourcesOfDivergence(); - DP.propagate(); - } -} - -bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const { - if (gpuDA) { - return gpuDA->isDivergent(*V); - } - return DivergentValues.count(V); -} - -bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const { - if (gpuDA) { - return gpuDA->isDivergentUse(*U); - } - return DivergentValues.count(U->get()) || DivergentUses.count(U); -} - -void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS, - const Module *) const { - if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty()) - return; - - const Function *F = nullptr; - if (!DivergentValues.empty()) { - const Value *FirstDivergentValue = *DivergentValues.begin(); - if (const Argument *Arg = dyn_cast(FirstDivergentValue)) { - F = Arg->getParent(); - } else if (const Instruction *I = - dyn_cast(FirstDivergentValue)) { - F = I->getParent()->getParent(); - } else { - llvm_unreachable("Only arguments and instructions can be divergent"); - } - } else if (gpuDA) { - F = &gpuDA->getFunction(); - } - if (!F) - return; - - // Dumps all divergent values in F, arguments and then instructions. - for (const auto &Arg : F->args()) { - OS << (isDivergent(&Arg) ? "DIVERGENT: " : " "); - OS << Arg << "\n"; - } - // Iterate instructions using instructions() to ensure a deterministic order. - for (const BasicBlock &BB : *F) { - OS << "\n " << BB.getName() << ":\n"; - for (const auto &I : BB.instructionsWithoutDebug()) { - OS << (isDivergent(&I) ? "DIVERGENT: " : " "); - OS << I << "\n"; - } - } - OS << "\n"; -} - -void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); - AU.setPreservesAll(); -} - -bool LegacyDivergenceAnalysis::runOnFunction(Function &F) { - auto *TTIWP = getAnalysisIfAvailable(); - if (TTIWP == nullptr) - return false; - - TargetTransformInfo &TTI = TTIWP->getTTI(F); - // Fast path: if the target does not have branch divergence, we do not mark - // any branch as divergent. - if (!TTI.hasBranchDivergence()) - return false; - - DivergentValues.clear(); - DivergentUses.clear(); - gpuDA = nullptr; - - auto &DT = getAnalysis().getDomTree(); - auto &PDT = getAnalysis().getPostDomTree(); - auto &LI = getAnalysis().getLoopInfo(); - LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI); - LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() - << ":\n"; - LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent())); - - return false; -} - -PreservedAnalyses -LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &TTI = AM.getResult(F); - if (!TTI.hasBranchDivergence()) - return PreservedAnalyses::all(); - - DivergentValues.clear(); - DivergentUses.clear(); - gpuDA = nullptr; - - auto &DT = AM.getResult(F); - auto &PDT = AM.getResult(F); - auto &LI = AM.getResult(F); - LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI); - LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName() - << ":\n"; - LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent())); - return PreservedAnalyses::all(); -} diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp deleted file mode 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ /dev/null @@ -1,478 +0,0 @@ -//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements an algorithm that returns for a divergent branch -// the set of basic blocks whose phi nodes become divergent due to divergent -// control. These are the blocks that are reachable by two disjoint paths from -// the branch or loop exits that have a reaching path that is disjoint from a -// path to the loop latch. -// -// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model -// control-induced divergence in phi nodes. -// -// -// -- Reference -- -// The algorithm is presented in Section 5 of -// -// An abstract interpretation for SPMD divergence -// on reducible control flow graphs. -// Julian Rosemann, Simon Moll and Sebastian Hack -// POPL '21 -// -// -// -- Sync dependence -- -// Sync dependence characterizes the control flow aspect of the -// propagation of branch divergence. For example, -// -// %cond = icmp slt i32 %tid, 10 -// br i1 %cond, label %then, label %else -// then: -// br label %merge -// else: -// br label %merge -// merge: -// %a = phi i32 [ 0, %then ], [ 1, %else ] -// -// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid -// because %tid is not on its use-def chains, %a is sync dependent on %tid -// because the branch "br i1 %cond" depends on %tid and affects which value %a -// is assigned to. -// -// -// -- Reduction to SSA construction -- -// There are two disjoint paths from A to X, if a certain variant of SSA -// construction places a phi node in X under the following set-up scheme. -// -// This variant of SSA construction ignores incoming undef values. -// That is paths from the entry without a definition do not result in -// phi nodes. -// -// entry -// / \ -// A \ -// / \ Y -// B C / -// \ / \ / -// D E -// \ / -// F -// -// Assume that A contains a divergent branch. We are interested -// in the set of all blocks where each block is reachable from A -// via two disjoint paths. This would be the set {D, F} in this -// case. -// To generally reduce this query to SSA construction we introduce -// a virtual variable x and assign to x different values in each -// successor block of A. -// -// entry -// / \ -// A \ -// / \ Y -// x = 0 x = 1 / -// \ / \ / -// D E -// \ / -// F -// -// Our flavor of SSA construction for x will construct the following -// -// entry -// / \ -// A \ -// / \ Y -// x0 = 0 x1 = 1 / -// \ / \ / -// x2 = phi E -// \ / -// x3 = phi -// -// The blocks D and F contain phi nodes and are thus each reachable -// by two disjoins paths from A. -// -// -- Remarks -- -// * In case of loop exits we need to check the disjoint path criterion for loops. -// To this end, we check whether the definition of x differs between the -// loop exit and the loop header (_after_ SSA construction). -// -// -- Known Limitations & Future Work -- -// * The algorithm requires reducible loops because the implementation -// implicitly performs a single iteration of the underlying data flow analysis. -// This was done for pragmatism, simplicity and speed. -// -// Relevant related work for extending the algorithm to irreducible control: -// A simple algorithm for global data flow analysis problems. -// Matthew S. Hecht and Jeffrey D. Ullman. -// SIAM Journal on Computing, 4(4):519–532, December 1975. -// -// * Another reason for requiring reducible loops is that points of -// synchronization in irreducible loops aren't 'obvious' - there is no unique -// header where threads 'should' synchronize when entering or coming back -// around from the latch. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/SyncDependenceAnalysis.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" - -#include - -#define DEBUG_TYPE "sync-dependence" - -// The SDA algorithm operates on a modified CFG - we modify the edges leaving -// loop headers as follows: -// -// * We remove all edges leaving all loop headers. -// * We add additional edges from the loop headers to their exit blocks. -// -// The modification is virtual, that is whenever we visit a loop header we -// pretend it had different successors. -namespace { -using namespace llvm; - -// Custom Post-Order Traveral -// -// We cannot use the vanilla (R)PO computation of LLVM because: -// * We (virtually) modify the CFG. -// * We want a loop-compact block enumeration, that is the numbers assigned to -// blocks of a loop form an interval -// -using POCB = std::function; -using VisitedSet = std::set; -using BlockStack = std::vector; - -// forward -static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, - VisitedSet &Finalized); - -// for a nested region (top-level loop or nested loop) -static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop, - POCB CallBack, VisitedSet &Finalized) { - const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr; - while (!Stack.empty()) { - const auto *NextBB = Stack.back(); - - auto *NestedLoop = LI.getLoopFor(NextBB); - bool IsNestedLoop = NestedLoop != Loop; - - // Treat the loop as a node - if (IsNestedLoop) { - SmallVector NestedExits; - NestedLoop->getUniqueExitBlocks(NestedExits); - bool PushedNodes = false; - for (const auto *NestedExitBB : NestedExits) { - if (NestedExitBB == LoopHeader) - continue; - if (Loop && !Loop->contains(NestedExitBB)) - continue; - if (Finalized.count(NestedExitBB)) - continue; - PushedNodes = true; - Stack.push_back(NestedExitBB); - } - if (!PushedNodes) { - // All loop exits finalized -> finish this node - Stack.pop_back(); - computeLoopPO(LI, *NestedLoop, CallBack, Finalized); - } - continue; - } - - // DAG-style - bool PushedNodes = false; - for (const auto *SuccBB : successors(NextBB)) { - if (SuccBB == LoopHeader) - continue; - if (Loop && !Loop->contains(SuccBB)) - continue; - if (Finalized.count(SuccBB)) - continue; - PushedNodes = true; - Stack.push_back(SuccBB); - } - if (!PushedNodes) { - // Never push nodes twice - Stack.pop_back(); - if (!Finalized.insert(NextBB).second) - continue; - CallBack(*NextBB); - } - } -} - -static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) { - VisitedSet Finalized; - BlockStack Stack; - Stack.reserve(24); // FIXME made-up number - Stack.push_back(&F.getEntryBlock()); - computeStackPO(Stack, LI, nullptr, CallBack, Finalized); -} - -static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack, - VisitedSet &Finalized) { - /// Call CallBack on all loop blocks. - std::vector Stack; - const auto *LoopHeader = Loop.getHeader(); - - // Visit the header last - Finalized.insert(LoopHeader); - CallBack(*LoopHeader); - - // Initialize with immediate successors - for (const auto *BB : successors(LoopHeader)) { - if (!Loop.contains(BB)) - continue; - if (BB == LoopHeader) - continue; - Stack.push_back(BB); - } - - // Compute PO inside region - computeStackPO(Stack, LI, &Loop, CallBack, Finalized); -} - -} // namespace - -namespace llvm { - -ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc; - -SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT, - const PostDominatorTree &PDT, - const LoopInfo &LI) - : DT(DT), PDT(PDT), LI(LI) { - computeTopLevelPO(*DT.getRoot()->getParent(), LI, - [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); }); -} - -SyncDependenceAnalysis::~SyncDependenceAnalysis() = default; - -namespace { -// divergence propagator for reducible CFGs -struct DivergencePropagator { - const ModifiedPO &LoopPOT; - const DominatorTree &DT; - const PostDominatorTree &PDT; - const LoopInfo &LI; - const BasicBlock &DivTermBlock; - - // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at - // block B - // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet - // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths - // from X or B is an immediate successor of X (initial value). - using BlockLabelVec = std::vector; - BlockLabelVec BlockLabels; - // divergent join and loop exit descriptor. - std::unique_ptr DivDesc; - - DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT, - const PostDominatorTree &PDT, const LoopInfo &LI, - const BasicBlock &DivTermBlock) - : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock), - BlockLabels(LoopPOT.size(), nullptr), - DivDesc(new ControlDivergenceDesc) {} - - void printDefs(raw_ostream &Out) { - Out << "Propagator::BlockLabels {\n"; - for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) { - const auto *Label = BlockLabels[BlockIdx]; - Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx - << ") : "; - if (!Label) { - Out << "\n"; - } else { - Out << Label->getName() << "\n"; - } - } - Out << "}\n"; - } - - // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this - // causes a divergent join. - bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) { - auto SuccIdx = LoopPOT.getIndexOf(SuccBlock); - - // unset or same reaching label - const auto *OldLabel = BlockLabels[SuccIdx]; - if (!OldLabel || (OldLabel == &PushedLabel)) { - BlockLabels[SuccIdx] = &PushedLabel; - return false; - } - - // Update the definition - BlockLabels[SuccIdx] = &SuccBlock; - return true; - } - - // visiting a virtual loop exit edge from the loop header --> temporal - // divergence on join - bool visitLoopExitEdge(const BasicBlock &ExitBlock, - const BasicBlock &DefBlock, bool FromParentLoop) { - // Pushing from a non-parent loop cannot cause temporal divergence. - if (!FromParentLoop) - return visitEdge(ExitBlock, DefBlock); - - if (!computeJoin(ExitBlock, DefBlock)) - return false; - - // Identified a divergent loop exit - DivDesc->LoopDivBlocks.insert(&ExitBlock); - LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName() - << "\n"); - return true; - } - - // process \p SuccBlock with reaching definition \p DefBlock - bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) { - if (!computeJoin(SuccBlock, DefBlock)) - return false; - - // Divergent, disjoint paths join. - DivDesc->JoinDivBlocks.insert(&SuccBlock); - LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName()); - return true; - } - - std::unique_ptr computeJoinPoints() { - assert(DivDesc); - - LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName() - << "\n"); - - const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock); - - // Early stopping criterion - int FloorIdx = LoopPOT.size() - 1; - const BasicBlock *FloorLabel = nullptr; - - // bootstrap with branch targets - int BlockIdx = 0; - - for (const auto *SuccBlock : successors(&DivTermBlock)) { - auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock); - BlockLabels[SuccIdx] = SuccBlock; - - // Find the successor with the highest index to start with - BlockIdx = std::max(BlockIdx, SuccIdx); - FloorIdx = std::min(FloorIdx, SuccIdx); - - // Identify immediate divergent loop exits - if (!DivBlockLoop) - continue; - - const auto *BlockLoop = LI.getLoopFor(SuccBlock); - if (BlockLoop && DivBlockLoop->contains(BlockLoop)) - continue; - DivDesc->LoopDivBlocks.insert(SuccBlock); - LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: " - << SuccBlock->getName() << "\n"); - } - - // propagate definitions at the immediate successors of the node in RPO - for (; BlockIdx >= FloorIdx; --BlockIdx) { - LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs())); - - // Any label available here - const auto *Label = BlockLabels[BlockIdx]; - if (!Label) - continue; - - // Ok. Get the block - const auto *Block = LoopPOT.getBlockAt(BlockIdx); - LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n"); - - auto *BlockLoop = LI.getLoopFor(Block); - bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block; - bool CausedJoin = false; - int LoweredFloorIdx = FloorIdx; - if (IsLoopHeader) { - // Disconnect from immediate successors and propagate directly to loop - // exits. - SmallVector BlockLoopExits; - BlockLoop->getExitBlocks(BlockLoopExits); - - bool IsParentLoop = BlockLoop->contains(&DivTermBlock); - for (const auto *BlockLoopExit : BlockLoopExits) { - CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop); - LoweredFloorIdx = std::min(LoweredFloorIdx, - LoopPOT.getIndexOf(*BlockLoopExit)); - } - } else { - // Acyclic successor case - for (const auto *SuccBlock : successors(Block)) { - CausedJoin |= visitEdge(*SuccBlock, *Label); - LoweredFloorIdx = - std::min(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock)); - } - } - - // Floor update - if (CausedJoin) { - // 1. Different labels pushed to successors - FloorIdx = LoweredFloorIdx; - } else if (FloorLabel != Label) { - // 2. No join caused BUT we pushed a label that is different than the - // last pushed label - FloorIdx = LoweredFloorIdx; - FloorLabel = Label; - } - } - - LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs())); - - return std::move(DivDesc); - } -}; -} // end anonymous namespace - -#ifndef NDEBUG -static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) { - Out << "["; - ListSeparator LS; - for (const auto *BB : Blocks) - Out << LS << BB->getName(); - Out << "]"; -} -#endif - -const ControlDivergenceDesc & -SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) { - // trivial case - if (Term.getNumSuccessors() <= 1) { - return EmptyDivergenceDesc; - } - - // already available in cache? - auto ItCached = CachedControlDivDescs.find(&Term); - if (ItCached != CachedControlDivDescs.end()) - return *ItCached->second; - - // compute all join points - // Special handling of divergent loop exits is not needed for LCSSA - const auto &TermBlock = *Term.getParent(); - DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock); - auto DivDesc = Propagator.computeJoinPoints(); - - LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n"; - dbgs() << "JoinDivBlocks: "; - printBlockSet(DivDesc->JoinDivBlocks, dbgs()); - dbgs() << "\nLoopDivBlocks: "; - printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";); - - auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc)); - assert(ItInserted.second); - return *ItInserted.first->second; -} - -} // namespace llvm diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -262,10 +262,6 @@ return TTIImpl->hasBranchDivergence(); } -bool TargetTransformInfo::useGPUDivergenceAnalysis() const { - return TTIImpl->useGPUDivergenceAnalysis(); -} - bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -34,7 +34,6 @@ #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" @@ -46,7 +45,6 @@ #include "llvm/Analysis/InstCount.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/Lint.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -249,7 +249,6 @@ TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) -FUNCTION_ANALYSIS("divergence", DivergenceAnalysis()) FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis()) #ifndef FUNCTION_ALIAS_ANALYSIS @@ -317,7 +316,6 @@ FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("instnamer", InstructionNamerPass()) -FUNCTION_PASS("legacy-divergence-analysis", LegacyDivergenceAnalysisPass()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) @@ -357,7 +355,6 @@ FUNCTION_PASS("print", CostModelPrinterPass(dbgs())) FUNCTION_PASS("print", CycleInfoPrinterPass(dbgs())) FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) -FUNCTION_PASS("print", DivergenceAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DelinearizationPrinterPass(dbgs())) diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll deleted file mode 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll +++ /dev/null @@ -1,142 +0,0 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx90a -passes='print' -disable-output %s 2>&1 | FileCheck %s - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec': -; CHECK-NOT: DIVERGENT -define i64 @read_register_exec() { - %reg = call i64 @llvm.read_register.i64(metadata !0) - ret i64 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_m0': -; CHECK-NOT: DIVERGENT -define i32 @read_register_m0() { - %reg = call i32 @llvm.read_register.i32(metadata !1) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17': -; CHECK-NOT: DIVERGENT -define i32 @read_register_s17() { - %reg = call i32 @llvm.read_register.i32(metadata !2) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17_i17': -; CHECK-NOT: DIVERGENT -define i17 @read_register_s17_i17() { - %reg = call i17 @llvm.read_register.i17(metadata !2) - ret i17 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0': -; CHECK: DIVERGENT -define i32 @read_register_v0() { - %reg = call i32 @llvm.read_register.i32(metadata !3) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0_v1': -; CHECK: DIVERGENT -define i64 @read_register_v0_v1() { - %reg = call i64 @llvm.read_register.i64(metadata !4) - ret i64 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0': -; CHECK: DIVERGENT -define i32 @read_register_a0() { - %reg = call i32 @llvm.read_register.i32(metadata !5) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0_a1': -; CHECK: DIVERGENT -define i64 @read_register_a0_a1() { - %reg = call i64 @llvm.read_register.i64(metadata !6) - ret i64 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i64': -; CHECK-NOT: DIVERGENT -define i64 @read_register_vcc_i64() { - %reg = call i64 @llvm.read_register.i64(metadata !7) - ret i64 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i1': -; CHECK: DIVERGENT -define i1 @read_register_vcc_i1() { - %reg = call i1 @llvm.read_register.i1(metadata !7) - ret i1 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_invalid_reg': -; CHECK-NOT: DIVERGENT -define i64 @read_register_invalid_reg() { - %reg = call i64 @llvm.read_register.i64(metadata !8) - ret i64 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_flat_scratch': -; CHECK-NOT: DIVERGENT -define i32 @read_register_flat_scratch() { - %reg = call i32 @llvm.read_register.i32(metadata !9) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_lo_i32': -; CHECK-NOT: DIVERGENT -define i32 @read_register_vcc_lo_i32() { - %reg = call i32 @llvm.read_register.i32(metadata !10) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_hi_i32': -; CHECK-NOT: DIVERGENT -define i32 @read_register_vcc_hi_i32() { - %reg = call i32 @llvm.read_register.i32(metadata !11) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_lo_i32': -; CHECK-NOT: DIVERGENT -define i32 @read_register_exec_lo_i32() { - %reg = call i32 @llvm.read_register.i32(metadata !12) - ret i32 %reg -} - -; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_hi_i32': -; CHECK-NOT: DIVERGENT -define i32 @read_register_exec_hi_i32() { - %reg = call i32 @llvm.read_register.i32(metadata !13) - ret i32 %reg -} - -; FIXME: Why does the verifier allow this? -; CHECK-LABEL: Divergence Analysis' for function 'read_register_empty_str_i32': -; CHECK-NOT: DIVERGENT -define i32 @read_register_empty_str_i32() { - %reg = call i32 @llvm.read_register.i32(metadata !14) - ret i32 %reg -} - -declare i64 @llvm.read_register.i64(metadata) -declare i32 @llvm.read_register.i32(metadata) -declare i17 @llvm.read_register.i17(metadata) -declare i1 @llvm.read_register.i1(metadata) - -!0 = !{!"exec"} -!1 = !{!"m0"} -!2 = !{!"s17"} -!3 = !{!"v0"} -!4 = !{!"v[0:1]"} -!5 = !{!"a0"} -!6 = !{!"a[0:1]"} -!7 = !{!"vcc"} -!8 = !{!"not a register"} -!9 = !{!"flat_scratch"} -!10 = !{!"vcc_lo"} -!11 = !{!"vcc_hi"} -!12 = !{!"exec_lo"} -!13 = !{!"exec_hi"} -!14 = !{!""} diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -passes='print' 2>&1 -disable-output %s | FileCheck %s - -; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst -define amdgpu_kernel void @test1(ptr %ptr, i32 %val) #0 { - %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst - store i32 %orig, ptr %ptr - ret void -} - -; CHECK: DIVERGENT: %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define amdgpu_kernel void @test2(ptr %ptr, i32 %cmp, i32 %new) { - %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst - %val = extractvalue { i32, i1 } %orig, 0 - store i32 %val, ptr %ptr - ret void -} - -; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false) -define i32 @test_atomic_inc_i32(ptr addrspace(1) %ptr, i32 %val) #0 { - %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false) - ret i32 %ret -} - -; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false) -define i64 @test_atomic_inc_i64(ptr addrspace(1) %ptr, i64 %val) #0 { - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false) - ret i64 %ret -} - -; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false) -define i32 @test_atomic_dec_i32(ptr addrspace(1) %ptr, i32 %val) #0 { - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 %val, i32 0, i32 0, i1 false) - ret i32 %ret -} - -; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false) -define i64 @test_atomic_dec_i64(ptr addrspace(1) %ptr, i64 %val) #0 { - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 %val, i32 0, i32 0, i1 false) - ret i64 %ret -} - -declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1 -declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1 -declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #1 -declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #1 - -; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val) -define amdgpu_kernel void @test_atomic_csub_i32(ptr addrspace(1) %ptr, i32 %val) #0 { - %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val) - store i32 %ret, ptr addrspace(1) %ptr, align 4 - ret void -} - -declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1 - -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -passes='print' 2>&1 -disable-output %s | FileCheck %s - -; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 -define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 { - %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 - store i32 %swizzle, ptr addrspace(1) %out, align 4 - ret void -} - -declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 - -attributes #0 = { nounwind convergent } -attributes #1 = { nounwind readnone convergent } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt %s -mtriple amdgcn-- -passes='print' 2>&1 -disable-output | FileCheck %s - -; CHECK-LABEL: function 'test_amdgpu_ps': -; CHECK: DIVERGENT: ptr addrspace(4) %arg0 -; CHECK-NOT: DIVERGENT -; CHECK: DIVERGENT: <2 x i32> %arg3 -; CHECK: DIVERGENT: <3 x i32> %arg4 -; CHECK: DIVERGENT: float %arg5 -; CHECK: DIVERGENT: i32 %arg6 - -define amdgpu_ps void @test_amdgpu_ps(ptr addrspace(4) byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { - ret void -} - -; CHECK-LABEL: function 'test_amdgpu_kernel': -; CHECK-NOT: %arg0 -; CHECK-NOT: %arg1 -; CHECK-NOT: %arg2 -; CHECK-NOT: %arg3 -; CHECK-NOT: %arg4 -; CHECK-NOT: %arg5 -; CHECK-NOT: %arg6 -define amdgpu_kernel void @test_amdgpu_kernel(ptr addrspace(4) byref([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { - ret void -} - -; CHECK-LABEL: function 'test_c': -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -; CHECK: DIVERGENT: -define void @test_c(ptr addrspace(4) byval([4 x <16 x i8>]) %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { - ret void -} - -attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'AMDGPU' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ /dev/null @@ -1,103 +0,0 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -passes='print' 2>&1 -disable-output %s | FileCheck %s - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( -define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32( -define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.add.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32( -define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32( -define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32( -define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32( -define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32( -define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32( -define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.and.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32( -define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.or.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32( -define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap( -define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0 - -attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ /dev/null @@ -1,131 +0,0 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -passes='print' 2>&1 -disable-output %s | FileCheck %s - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( -define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32( -define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32( -define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32( -define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32( -define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32( -define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32( -define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32( -define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32( -define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32( -define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32( -define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32( -define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32( -define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32( -define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 - -declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 - -attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -passes='print' 2>&1 -disable-output %s | FileCheck %s - -; Test that we consider loads from flat and private addrspaces to be divergent. - -; CHECK: DIVERGENT: %val = load i32, ptr %flat, align 4 -define amdgpu_kernel void @flat_load(ptr %flat) { - %val = load i32, ptr %flat, align 4 - ret void -} - -; CHECK: DIVERGENT: %val = load i32, ptr addrspace(5) %priv, align 4 -define amdgpu_kernel void @private_load(ptr addrspace(5) %priv) { - %val = load i32, ptr addrspace(5) %priv, align 4 - ret void -} diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: opt %s -mtriple amdgcn-- -passes='print' 2>&1 -disable-output | FileCheck %s - -; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2 -; CHECK: DIVERGENT: %tmp10 = load volatile float, ptr addrspace(1) %tmp5, align 4 -; CHECK: DIVERGENT: %tmp11 = load volatile float, ptr addrspace(1) %tmp5, align 4 - -; The post dominator tree does not have a root node in this case -define amdgpu_kernel void @no_return_blocks(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1) #0 { -bb0: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %tmp2 = sext i32 %tmp to i64 - %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2 - %tmp6 = load volatile float, ptr addrspace(1) %tmp5, align 4 - %tmp8 = fcmp olt float %tmp6, 0.000000e+00 - br i1 %tmp8, label %bb1, label %bb2 - -bb1: - %tmp10 = load volatile float, ptr addrspace(1) %tmp5, align 4 - br label %bb2 - -bb2: - %tmp11 = load volatile float, ptr addrspace(1) %tmp5, align 4 - br label %bb1 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.amdgcn.workitem.id.x() #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -passes='print' 2>&1 -disable-output %s | FileCheck %s - -; CHECK-LABEL: 'test1': -; CHECK-NEXT: DIVERGENT: i32 %bound -; CHECK: {{^ *}}%counter = -; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound -; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body -; CHECK: {{^ *}}%counter.next = -; CHECK: {{^ *}}%counter.footer = -; CHECK: DIVERGENT: br i1 %break, label %end, label %header -; Note: %counter is not divergent! -define amdgpu_ps void @test1(i32 %bound) { -entry: - br label %header - -header: - %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ] - %break = icmp sge i32 %counter, %bound - br i1 %break, label %footer, label %body - -body: - %counter.next = add i32 %counter, 1 - br label %footer - -footer: - %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ] - br i1 %break, label %end, label %header - -end: - ret void -} diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: opt %s -mtriple amdgcn-- -passes='print' 2>&1 -disable-output | FileCheck %s - -; CHECK: DIVERGENT: %tmp = cmpxchg volatile -define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { -entry: - unreachable - -unreachable_loop: ; preds = %do.body.i, %if.then11 - %tmp = cmpxchg volatile ptr addrspace(1) null, i32 0, i32 0 seq_cst seq_cst - %cmp.i = extractvalue { i32, i1 } %tmp, 1 - br i1 %cmp.i, label %unreachable_loop, label %end - -end: ; preds = %do.body.i51, %atomicAdd_g_f.exit - unreachable -} - -attributes #0 = { norecurse nounwind } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' 2>&1 -disable-output %s | FileCheck %s - -declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i32 @llvm.amdgcn.workitem.id.y() #0 -declare i32 @llvm.amdgcn.workitem.id.z() #0 -declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 -declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 - -; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @workitem_id_x() #1 { - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - store volatile i32 %id.x, ptr addrspace(1) undef - ret void -} - -; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y() -define amdgpu_kernel void @workitem_id_y() #1 { - %id.y = call i32 @llvm.amdgcn.workitem.id.y() - store volatile i32 %id.y, ptr addrspace(1) undef - ret void -} - -; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z() -define amdgpu_kernel void @workitem_id_z() #1 { - %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.z, ptr addrspace(1) undef - ret void -} - -; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0) -define amdgpu_kernel void @mbcnt_lo() #1 { - %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0) - store volatile i32 %mbcnt.lo, ptr addrspace(1) undef - ret void -} - -; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) -define amdgpu_kernel void @mbcnt_hi() #1 { - %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) - store volatile i32 %mbcnt.hi, ptr addrspace(1) undef - ret void -} - -attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/diverge.ll +++ /dev/null @@ -1,219 +0,0 @@ -; RUN: opt %s -passes='print' 2>&1 -disable-output | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) -define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: function 'no_diverge' -entry: - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - %cond = icmp slt i32 %n, 0 - br i1 %cond, label %then, label %else ; uniform -; CHECK-NOT: DIVERGENT: br i1 %cond, -then: - %a1 = add i32 %a, %tid - br label %merge -else: - %b2 = add i32 %b, %tid - br label %merge -merge: - %c = phi i32 [ %a1, %then ], [ %b2, %else ] - ret i32 %c -} - -; c = a; -; if (threadIdx.x < 5) // divergent: data dependent -; c = b; -; return c; // c is divergent: sync dependent -define i32 @sync(i32 %a, i32 %b) { -; CHECK-LABEL: function 'sync' -bb1: - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - %cond = icmp slt i32 %tid, 5 - br i1 %cond, label %bb2, label %bb3 -; CHECK: DIVERGENT: br i1 %cond, -bb2: - br label %bb3 -bb3: - %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid -; CHECK: DIVERGENT: %c = - ret i32 %c -} - -; c = 0; -; if (threadIdx.x >= 5) { // divergent -; c = (n < 0 ? a : b); // c here is uniform because n is uniform -; } -; // c here is divergent because it is sync dependent on threadIdx.x >= 5 -; return c; -define i32 @mixed(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: function 'mixed' -bb1: - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() - %cond = icmp slt i32 %tid, 5 - br i1 %cond, label %bb6, label %bb2 -; CHECK: DIVERGENT: br i1 %cond, -bb2: - %cond2 = icmp slt i32 %n, 0 - br i1 %cond2, label %bb4, label %bb3 -bb3: - br label %bb5 -bb4: - br label %bb5 -bb5: - %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ] -; CHECK-NOT: DIVERGENT: %c = - br label %bb6 -bb6: - %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ] -; CHECK: DIVERGENT: %c2 = - ret i32 %c2 -} - -; We conservatively treats all parameters of a __device__ function as divergent. -define i32 @device(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: function 'device' -; CHECK: DIVERGENT: i32 %n -; CHECK: DIVERGENT: i32 %a -; CHECK: DIVERGENT: i32 %b -entry: - %cond = icmp slt i32 %n, 0 - br i1 %cond, label %then, label %else -; CHECK: DIVERGENT: br i1 %cond, -then: - br label %merge -else: - br label %merge -merge: - %c = phi i32 [ %a, %then ], [ %b, %else ] - ret i32 %c -} - -; int i = 0; -; do { -; i++; // i here is uniform -; } while (i < laneid); -; return i == 10 ? 0 : 1; // i here is divergent -; -; The i defined in the loop is used outside. -define i32 @loop() { -; CHECK-LABEL: function 'loop' -entry: - %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() - br label %loop -loop: - %i = phi i32 [ 0, %entry ], [ %i1, %loop ] -; CHECK-NOT: DIVERGENT: %i = - %i1 = add i32 %i, 1 - %exit_cond = icmp sge i32 %i1, %laneid - br i1 %exit_cond, label %loop_exit, label %loop -loop_exit: - %cond = icmp eq i32 %i, 10 - br i1 %cond, label %then, label %else -; CHECK: DIVERGENT: br i1 %cond, -then: - ret i32 0 -else: - ret i32 1 -} - -; Same as @loop, but the loop is in the LCSSA form. -define i32 @lcssa() { -; CHECK-LABEL: function 'lcssa' -entry: - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - br label %loop -loop: - %i = phi i32 [ 0, %entry ], [ %i1, %loop ] -; CHECK-NOT: DIVERGENT: %i = - %i1 = add i32 %i, 1 - %exit_cond = icmp sge i32 %i1, %tid - br i1 %exit_cond, label %loop_exit, label %loop -loop_exit: - %i.lcssa = phi i32 [ %i, %loop ] -; CHECK: DIVERGENT: %i.lcssa = - %cond = icmp eq i32 %i.lcssa, 10 - br i1 %cond, label %then, label %else -; CHECK: DIVERGENT: br i1 %cond, -then: - ret i32 0 -else: - ret i32 1 -} - -; This test contains an unstructured loop. -; +-------------- entry ----------------+ -; | | -; V V -; i1 = phi(0, i3) i2 = phi(0, i3) -; j1 = i1 + 1 ---> i3 = phi(j1, j2) <--- j2 = i2 + 2 -; ^ | ^ -; | V | -; +-------- switch (tid / i3) ----------+ -; | -; V -; if (i3 == 5) // divergent -; because sync dependent on (tid / i3). -define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: function 'unstructured_loop' -entry: - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2 -loop_entry_1: - %i1 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] - %j1 = add i32 %i1, 1 - br label %loop_body -loop_entry_2: - %i2 = phi i32 [ 0, %entry ], [ %i3, %loop_latch ] - %j2 = add i32 %i2, 2 - br label %loop_body -loop_body: - %i3 = phi i32 [ %j1, %loop_entry_1 ], [ %j2, %loop_entry_2 ] - br label %loop_latch -loop_latch: - %div = sdiv i32 %tid, %i3 - switch i32 %div, label %branch [ i32 1, label %loop_entry_1 - i32 2, label %loop_entry_2 ] -branch: - %cmp = icmp eq i32 %i3, 5 - br i1 %cmp, label %then, label %else -; CHECK: DIVERGENT: br i1 %cmp, -then: - ret i32 0 -else: - ret i32 1 -} - -; Verifies sync-dependence is computed correctly in the absense of loops. -define i32 @sync_no_loop(i32 %arg) { -entry: - %0 = add i32 %arg, 1 - %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - %1 = icmp sge i32 %tid, 10 - br i1 %1, label %bb1, label %bb2 - -bb1: - br label %bb3 - -bb2: - br label %bb3 - -bb3: - %2 = add i32 %0, 2 - ; CHECK-NOT: DIVERGENT: %2 - ret i32 %2 -} - -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() -declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() -declare i32 @llvm.nvvm.read.ptx.sreg.laneid() - -!nvvm.annotations = !{!0, !1, !2, !3, !4, !5} -!0 = !{ptr @no_diverge, !"kernel", i32 1} -!1 = !{ptr @sync, !"kernel", i32 1} -!2 = !{ptr @mixed, !"kernel", i32 1} -!3 = !{ptr @loop, !"kernel", i32 1} -!4 = !{ptr @unstructured_loop, !"kernel", i32 1} -!5 = !{ptr @sync_no_loop, !"kernel", i32 1} diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg b/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg deleted file mode 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/NVPTX/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'NVPTX' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform-gmir.mir +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform-gmir.mir @@ -1,4 +1,4 @@ -# NOTE: This file is Generic MIR translation of test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll test file +# NOTE: This file is Generic MIR translation of test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll test file # RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s --- name: readfirstlane diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/always-uniform.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/atomics.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/hidden-diverge.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/branch-outside-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/diverged-entry-basic-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/exit-divergence-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-1.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/irreducible/irreducible-2-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/join-loopexit-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/loads-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/loads-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-diverge-gmir.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: for function 'readfirstlane': diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/atomics.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/b42473-r1-crash.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @gf2(i32) diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/control-flow-intrinsics.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Tests control flow intrinsics that should be treated as uniform diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_diverge.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s define amdgpu_kernel void @hidden_diverge(i32 %n, i32 %a, i32 %b) #0 { diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_loopdiverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/hidden_loopdiverge.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; divergent loop (H
, B) @@ -17,15 +16,15 @@ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ] %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %B ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, B: %uni.inc = add i32 %uni.merge.h, 1 %div.exity = icmp sgt i32 %tid, 0 br i1 %div.exity, label %Y, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exity = -; CHECK: DIVERGENT: br i1 %div.exity, +; CHECK: DIVERGENT: %div.exity = +; CHECK: DIVERGENT: br i1 %div.exity, X: %div.merge.x = phi i32 [ %a, %entry ], [ %uni.merge.h, %H ] ; temporal divergent phi @@ -59,18 +58,18 @@ %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %B ] %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %B ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, B: %uni.inc = add i32 %uni.merge.h, 1 %div.exity = icmp sgt i32 %tid, 0 br i1 %div.exity, label %Y, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exity = -; CHECK: DIVERGENT: br i1 %div.exity, +; CHECK: DIVERGENT: %div.exity = +; CHECK: DIVERGENT: br i1 %div.exity, X: - %uni.merge.x = phi i32 [ %a, %entry ], [ %b, %H ] + %uni.merge.x = phi i32 [ %a, %entry ], [ %b, %H ] br label %exit Y: @@ -100,10 +99,10 @@ H: %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %D ] br i1 %uni.cond, label %G, label %B -; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: %div.exitx = B: - br i1 %uni.cond, label %X, label %C + br i1 %uni.cond, label %X, label %C C: br i1 %uni.cond, label %Y, label %D @@ -114,7 +113,7 @@ G: br i1 %div.exitx, label %C, label %L -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: br i1 %div.exitx, L: br i1 %uni.cond, label %D, label %G @@ -151,10 +150,10 @@ H: %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %C ] br i1 %uni.cond, label %G, label %B -; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: %div.exitx = B: - br i1 %uni.cond, label %Y, label %C + br i1 %uni.cond, label %Y, label %C C: %uni.inc = add i32 %uni.merge.h, 1 @@ -162,7 +161,7 @@ G: br i1 %div.exitx, label %X, label %L ; two-level break -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: br i1 %div.exitx, L: br i1 %uni.cond, label %C, label %G @@ -193,7 +192,7 @@ H: %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc.d, %D ] br i1 %uni.cond, label %G, label %B -; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: %div.exitx = B: %div.merge.b = phi i32 [ 42, %H ], [ %uni.merge.g, %G ] @@ -203,7 +202,7 @@ G: %uni.merge.g = phi i32 [ 123, %H ], [ %uni.inc.l, %L ] br i1 %div.exitx, label %B, label %L -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: br i1 %div.exitx, L: %uni.inc.l = add i32 %uni.merge.g, 1 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/inline-asm.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/inline-asm.ll @@ -1,5 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print' -disable-output %s 2>&1 | FileCheck %s -; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=tahiti -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx908 -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Make sure nothing crashes on targets with or without AGPRs diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/interp_f16.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/interp_f16.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: for function 'interp_p1_f16' diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/branch-outside.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/branch-outside.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/branch-outside.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-basic.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers-nested.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/diverged-entry-headers.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/exit-divergence.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/exit-divergence.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/exit-divergence.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-1.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-1.ll @@ -1,4 +1,3 @@ -; RUN: opt %s -mtriple amdgcn-- -passes='print' -disable-output 2>&1 | FileCheck %s ; RUN: opt %s -mtriple amdgcn-- -passes='print' -disable-output 2>&1 | FileCheck %s ; This test contains an unstructured loop. diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/irreducible-2.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/irreducible-2.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/reducible-headers.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/irreducible/reducible-headers.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/irreducible/reducible-headers.ll diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-exit.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-exit.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %Guard.bb4 = phi i1 [ true, %bb1 ], [ false, %bb2 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/join-at-loop-heart.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/join-at-loop-heart.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %phi.h = phi i32 [ 0, %entry ], [ %inc, %C ], [ %inc, %D ], [ %inc, %E ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/kernel-args.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: for function 'test_amdgpu_ps': diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/lit.local.cfg rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/lit.local.cfg diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32( diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-mesa-mesa3d -passes='print' -disable-output %s 2>&1 | FileCheck %s ;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/no-return-blocks.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, ptr addrspace(1) %arg, i64 %tmp2 diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi-undef.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-LABEL: 'test1': diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/propagate-loop-live-out.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/propagate-loop-live-out.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %.126.i355.i = phi i1 [ false, %bb5 ], [ true, %bb4 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/temporal_diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/temporal_diverge.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; temporal-divergent use of value carried by divergent loop @@ -17,8 +16,8 @@ %uni.inc = add i32 %uni.merge.h, 1 %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, X: %div.user = add i32 %uni.inc, 5 @@ -44,8 +43,8 @@ %uni.inc = add i32 %uni.merge.h, 1 %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, X: %div.user = add i32 %uni.inc, 5 @@ -79,9 +78,9 @@ X: %uni.user = add i32 %uni.inc, 5 %div.exity = icmp slt i32 %tid, 0 -; CHECK: DIVERGENT: %div.exity = +; CHECK: DIVERGENT: %div.exity = br i1 %div.exity, label %G, label %Y -; CHECK: DIVERGENT: br i1 %div.exity, +; CHECK: DIVERGENT: br i1 %div.exity, Y: %div.alsouser = add i32 %uni.inc, 5 @@ -105,8 +104,8 @@ %uni.inc = add i32 %uni.merge.h, 1 %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, X: br label %G @@ -135,8 +134,8 @@ %uni.inc = add i32 %uni.merge.h, 1 %div.exitx = icmp slt i32 %tid, 0 br i1 %div.exitx, label %X, label %H ; divergent branch -; CHECK: DIVERGENT: %div.exitx = -; CHECK: DIVERGENT: br i1 %div.exitx, +; CHECK: DIVERGENT: %div.exitx = +; CHECK: DIVERGENT: br i1 %div.exitx, X: br label %G diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/trivial-join-at-loop-exit.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/trivial-join-at-loop-exit.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK-NOT: DIVERGENT: %Guard.bb2 = phi i1 [ true, %bb1 ], [ false, %bb0 ] diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %tmp = cmpxchg volatile diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll rename from llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll rename to llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,4 +1,3 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -mtriple amdgcn-unknown-amdhsa -passes='print' -disable-output %s 2>&1 | FileCheck %s declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/daorder.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll @@ -1,4 +1,3 @@ -; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s ; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" @@ -14,11 +13,11 @@ ; CHECK: DIVERGENT: br i1 %cond, A: %defAtA = add i32 %n, 1 ; uniform -; CHECK-NOT: DIVERGENT: %defAtA = +; CHECK-NOT: DIVERGENT: %defAtA = br label %C B: %defAtB = add i32 %n, 2 ; uniform -; CHECK-NOT: DIVERGENT: %defAtB = +; CHECK-NOT: DIVERGENT: %defAtB = br label %C C: %defAtC = phi i32 [ %defAtA, %A ], [ %defAtB, %B ] ; divergent diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll @@ -1,4 +1,3 @@ -; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s ; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/hidden_diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll @@ -1,4 +1,3 @@ -; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s ; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll --- a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/irreducible.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll @@ -1,4 +1,3 @@ -; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s ; RUN: opt %s -passes='print' -disable-output 2>&1 | FileCheck %s ; NOTE: The new pass manager does not fall back on legacy divergence diff --git a/llvm/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg b/llvm/test/Analysis/UniformityAnalysis/NVPTX/lit.local.cfg rename from llvm/test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg rename to llvm/test/Analysis/UniformityAnalysis/NVPTX/lit.local.cfg diff --git a/llvm/test/CodeGen/AMDGPU/always-uniform.ll b/llvm/test/CodeGen/AMDGPU/always-uniform.ll --- a/llvm/test/CodeGen/AMDGPU/always-uniform.ll +++ b/llvm/test/CodeGen/AMDGPU/always-uniform.ll @@ -1,19 +1,8 @@ -; RUN: opt -mtriple amdgcn-amdhsa -mcpu=gfx90a -passes=legacy-divergence-analysis < %s -S 2>&1 | FileCheck -check-prefix=OPT %s ; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.readfirstlane(i32) -; OPT-LABEL: define amdgpu_kernel void @readfirstlane_uniform( -; OPT-NEXT: %tid = tail call i32 @llvm.amdgcn.workitem.id.x() -; OPT-NEXT: %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid) -; OPT-NEXT: %idx = zext i32 %scalar to i64 -; OPT-NEXT: %gep0 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %idx -; OPT-NEXT: %val = load float, ptr addrspace(1) %gep0, align 4 -; OPT-NEXT: %gep1 = getelementptr inbounds float, ptr addrspace(1) %1, i64 10 -; OPT-NEXT: store float %val, ptr addrspace(1) %gep1, align 4 -; OPT-NEXT: ret void -; ; GCN-LABEL: readfirstlane_uniform ; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0 ; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0 diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -645,8 +645,7 @@ ; GCN-LABEL: {{^}}smrd_uniform_loop2: -; (this test differs from smrd_uniform_loop by the more complex structure of phis, -; which used to confuse the DivergenceAnalysis after structurization) +; (this test differs from smrd_uniform_loop by the more complex structure of phis) ; ; TODO: we should keep the loop counter in an SGPR and use an S_BUFFER_LOAD ; diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -22,7 +22,6 @@ CGSCCPassManagerTest.cpp ConstraintSystemTest.cpp DDGTest.cpp - DivergenceAnalysisTest.cpp DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp FunctionPropertiesAnalysisTest.cpp diff --git a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp b/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp deleted file mode 100644 --- a/llvm/unittests/Analysis/DivergenceAnalysisTest.cpp +++ /dev/null @@ -1,430 +0,0 @@ -//===- DivergenceAnalysisTest.cpp - DivergenceAnalysis unit tests ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/SyncDependenceAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/AsmParser/Parser.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/SourceMgr.h" -#include "gtest/gtest.h" - -namespace llvm { -namespace { - -BasicBlock *GetBlockByName(StringRef BlockName, Function &F) { - for (auto &BB : F) { - if (BB.getName() != BlockName) - continue; - return &BB; - } - return nullptr; -} - -// We use this fixture to ensure that we clean up DivergenceAnalysisImpl before -// deleting the PassManager. -class DivergenceAnalysisTest : public testing::Test { -protected: - LLVMContext Context; - Module M; - TargetLibraryInfoImpl TLII; - TargetLibraryInfo TLI; - - std::unique_ptr DT; - std::unique_ptr PDT; - std::unique_ptr LI; - std::unique_ptr SDA; - - DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {} - - DivergenceAnalysisImpl buildDA(Function &F, bool IsLCSSA) { - DT.reset(new DominatorTree(F)); - PDT.reset(new PostDominatorTree(F)); - LI.reset(new LoopInfo(*DT)); - SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI)); - return DivergenceAnalysisImpl(F, nullptr, *DT, *LI, *SDA, IsLCSSA); - } - - void runWithDA( - Module &M, StringRef FuncName, bool IsLCSSA, - function_ref - Test) { - auto *F = M.getFunction(FuncName); - ASSERT_NE(F, nullptr) << "Could not find " << FuncName; - DivergenceAnalysisImpl DA = buildDA(*F, IsLCSSA); - Test(*F, *LI, DA); - } -}; - -// Simple initial state test -TEST_F(DivergenceAnalysisTest, DAInitialState) { - IntegerType *IntTy = IntegerType::getInt32Ty(Context); - FunctionType *FTy = - FunctionType::get(Type::getVoidTy(Context), {IntTy}, false); - Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); - BasicBlock *BB = BasicBlock::Create(Context, "entry", F); - ReturnInst::Create(Context, nullptr, BB); - - DivergenceAnalysisImpl DA = buildDA(*F, false); - - // Whole function region - EXPECT_EQ(DA.getRegionLoop(), nullptr); - - // No divergence in initial state - EXPECT_FALSE(DA.hasDetectedDivergence()); - - // No spurious divergence - DA.compute(); - EXPECT_FALSE(DA.hasDetectedDivergence()); - - // Detected divergence after marking - Argument &arg = *F->arg_begin(); - DA.markDivergent(arg); - - EXPECT_TRUE(DA.hasDetectedDivergence()); - EXPECT_TRUE(DA.isDivergent(arg)); - - DA.compute(); - EXPECT_TRUE(DA.hasDetectedDivergence()); - EXPECT_TRUE(DA.isDivergent(arg)); -} - -TEST_F(DivergenceAnalysisTest, DANoLCSSA) { - LLVMContext C; - SMDiagnostic Err; - - std::unique_ptr M = parseAssemblyString( - "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " - " " - "define i32 @f_1(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) " - " local_unnamed_addr { " - "entry: " - " br label %loop.ph " - " " - "loop.ph: " - " br label %loop " - " " - "loop: " - " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] " - " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] " - " %iv0.inc = add i32 %iv0, 1 " - " %iv1.inc = add i32 %iv1, 3 " - " %cond.cont = icmp slt i32 %iv0, %n " - " br i1 %cond.cont, label %loop, label %for.end.loopexit " - " " - "for.end.loopexit: " - " ret i32 %iv0 " - "} ", - Err, C); - - Function *F = M->getFunction("f_1"); - DivergenceAnalysisImpl DA = buildDA(*F, false); - EXPECT_FALSE(DA.hasDetectedDivergence()); - - auto ItArg = F->arg_begin(); - ItArg++; - auto &NArg = *ItArg; - - // Seed divergence in argument %n - DA.markDivergent(NArg); - - DA.compute(); - EXPECT_TRUE(DA.hasDetectedDivergence()); - - // Verify that "ret %iv.0" is divergent - auto ItBlock = F->begin(); - std::advance(ItBlock, 3); - auto &ExitBlock = *GetBlockByName("for.end.loopexit", *F); - auto &RetInst = *cast(ExitBlock.begin()); - EXPECT_TRUE(DA.isDivergent(RetInst)); -} - -TEST_F(DivergenceAnalysisTest, DALCSSA) { - LLVMContext C; - SMDiagnostic Err; - - std::unique_ptr M = parseAssemblyString( - "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " - " " - "define i32 @f_lcssa(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) " - " local_unnamed_addr { " - "entry: " - " br label %loop.ph " - " " - "loop.ph: " - " br label %loop " - " " - "loop: " - " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] " - " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] " - " %iv0.inc = add i32 %iv0, 1 " - " %iv1.inc = add i32 %iv1, 3 " - " %cond.cont = icmp slt i32 %iv0, %n " - " br i1 %cond.cont, label %loop, label %for.end.loopexit " - " " - "for.end.loopexit: " - " %val.ret = phi i32 [ %iv0, %loop ] " - " br label %detached.return " - " " - "detached.return: " - " ret i32 %val.ret " - "} ", - Err, C); - - Function *F = M->getFunction("f_lcssa"); - DivergenceAnalysisImpl DA = buildDA(*F, true); - EXPECT_FALSE(DA.hasDetectedDivergence()); - - auto ItArg = F->arg_begin(); - ItArg++; - auto &NArg = *ItArg; - - // Seed divergence in argument %n - DA.markDivergent(NArg); - - DA.compute(); - EXPECT_TRUE(DA.hasDetectedDivergence()); - - // Verify that "ret %iv.0" is divergent - auto ItBlock = F->begin(); - std::advance(ItBlock, 4); - auto &ExitBlock = *GetBlockByName("detached.return", *F); - auto &RetInst = *cast(ExitBlock.begin()); - EXPECT_TRUE(DA.isDivergent(RetInst)); -} - -TEST_F(DivergenceAnalysisTest, DAJoinDivergence) { - LLVMContext C; - SMDiagnostic Err; - - std::unique_ptr M = parseAssemblyString( - "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " - " " - "define void @f_1(i1 %a, i1 %b, i1 %c) " - " local_unnamed_addr { " - "A: " - " br i1 %a, label %B, label %C " - " " - "B: " - " br i1 %b, label %C, label %D " - " " - "C: " - " %c.join = phi i32 [ 0, %A ], [ 1, %B ] " - " br i1 %c, label %D, label %E " - " " - "D: " - " %d.join = phi i32 [ 0, %B ], [ 1, %C ] " - " br label %E " - " " - "E: " - " %e.join = phi i32 [ 0, %C ], [ 1, %D ] " - " ret void " - "} " - " " - "define void @f_2(i1 %a, i1 %b, i1 %c) " - " local_unnamed_addr { " - "A: " - " br i1 %a, label %B, label %E " - " " - "B: " - " br i1 %b, label %C, label %D " - " " - "C: " - " br label %D " - " " - "D: " - " %d.join = phi i32 [ 0, %B ], [ 1, %C ] " - " br label %E " - " " - "E: " - " %e.join = phi i32 [ 0, %A ], [ 1, %D ] " - " ret void " - "} " - " " - "define void @f_3(i1 %a, i1 %b, i1 %c)" - " local_unnamed_addr { " - "A: " - " br i1 %a, label %B, label %C " - " " - "B: " - " br label %C " - " " - "C: " - " %c.join = phi i32 [ 0, %A ], [ 1, %B ] " - " br i1 %c, label %D, label %E " - " " - "D: " - " br label %E " - " " - "E: " - " %e.join = phi i32 [ 0, %C ], [ 1, %D ] " - " ret void " - "} ", - Err, C); - - // Maps divergent conditions to the basic blocks whose Phi nodes become - // divergent. Blocks need to be listed in IR order. - using SmallBlockVec = SmallVector; - using InducedDivJoinMap = std::map; - - // Actual function performing the checks. - auto CheckDivergenceFunc = [this](Function &F, - InducedDivJoinMap &ExpectedDivJoins) { - for (auto &ItCase : ExpectedDivJoins) { - auto *DivVal = ItCase.first; - auto DA = buildDA(F, false); - DA.markDivergent(*DivVal); - DA.compute(); - - // List of basic blocks that shall host divergent Phi nodes. - auto ItDivJoins = ItCase.second.begin(); - - for (auto &BB : F) { - auto *Phi = dyn_cast(BB.begin()); - if (!Phi) - continue; - - if (ItDivJoins != ItCase.second.end() && &BB == *ItDivJoins) { - EXPECT_TRUE(DA.isDivergent(*Phi)); - // Advance to next block with expected divergent PHI node. - ++ItDivJoins; - } else { - EXPECT_FALSE(DA.isDivergent(*Phi)); - } - } - } - }; - - { - auto *F = M->getFunction("f_1"); - auto ItBlocks = F->begin(); - ItBlocks++; // Skip A - ItBlocks++; // Skip B - auto *C = &*ItBlocks++; - auto *D = &*ItBlocks++; - auto *E = &*ItBlocks; - - auto ItArg = F->arg_begin(); - auto *AArg = &*ItArg++; - auto *BArg = &*ItArg++; - auto *CArg = &*ItArg; - - InducedDivJoinMap DivJoins; - DivJoins.emplace(AArg, SmallBlockVec({C, D, E})); - DivJoins.emplace(BArg, SmallBlockVec({D, E})); - DivJoins.emplace(CArg, SmallBlockVec({E})); - - CheckDivergenceFunc(*F, DivJoins); - } - - { - auto *F = M->getFunction("f_2"); - auto ItBlocks = F->begin(); - ItBlocks++; // Skip A - ItBlocks++; // Skip B - ItBlocks++; // Skip C - auto *D = &*ItBlocks++; - auto *E = &*ItBlocks; - - auto ItArg = F->arg_begin(); - auto *AArg = &*ItArg++; - auto *BArg = &*ItArg++; - auto *CArg = &*ItArg; - - InducedDivJoinMap DivJoins; - DivJoins.emplace(AArg, SmallBlockVec({E})); - DivJoins.emplace(BArg, SmallBlockVec({D})); - DivJoins.emplace(CArg, SmallBlockVec({})); - - CheckDivergenceFunc(*F, DivJoins); - } - - { - auto *F = M->getFunction("f_3"); - auto ItBlocks = F->begin(); - ItBlocks++; // Skip A - ItBlocks++; // Skip B - auto *C = &*ItBlocks++; - ItBlocks++; // Skip D - auto *E = &*ItBlocks; - - auto ItArg = F->arg_begin(); - auto *AArg = &*ItArg++; - auto *BArg = &*ItArg++; - auto *CArg = &*ItArg; - - InducedDivJoinMap DivJoins; - DivJoins.emplace(AArg, SmallBlockVec({C})); - DivJoins.emplace(BArg, SmallBlockVec({})); - DivJoins.emplace(CArg, SmallBlockVec({E})); - - CheckDivergenceFunc(*F, DivJoins); - } -} - -TEST_F(DivergenceAnalysisTest, DASwitchUnreachableDefault) { - LLVMContext C; - SMDiagnostic Err; - - std::unique_ptr M = parseAssemblyString( - "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" " - " " - "define void @switch_unreachable_default(i32 %cond) local_unnamed_addr { " - "entry: " - " switch i32 %cond, label %sw.default [ " - " i32 0, label %sw.bb0 " - " i32 1, label %sw.bb1 " - " ] " - " " - "sw.bb0: " - " br label %sw.epilog " - " " - "sw.bb1: " - " br label %sw.epilog " - " " - "sw.default: " - " unreachable " - " " - "sw.epilog: " - " %div.dbl = phi double [ 0.0, %sw.bb0], [ -1.0, %sw.bb1 ] " - " ret void " - "}", - Err, C); - - auto *F = M->getFunction("switch_unreachable_default"); - auto &CondArg = *F->arg_begin(); - auto DA = buildDA(*F, false); - - EXPECT_FALSE(DA.hasDetectedDivergence()); - - DA.markDivergent(CondArg); - DA.compute(); - - // Still %CondArg is divergent. - EXPECT_TRUE(DA.hasDetectedDivergence()); - - // The join uni.dbl is not divergent (see D52221) - auto &ExitBlock = *GetBlockByName("sw.epilog", *F); - auto &DivDblPhi = *cast(ExitBlock.begin()); - EXPECT_TRUE(DA.isDivergent(DivDblPhi)); -} - -} // end anonymous namespace -} // end namespace llvm diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -45,7 +45,6 @@ "DependenceAnalysis.cpp", "DependenceGraphBuilder.cpp", "DevelopmentModeInlineAdvisor.cpp", - "DivergenceAnalysis.cpp", "DomPrinter.cpp", "DomTreeUpdater.cpp", "DominanceFrontier.cpp", @@ -72,7 +71,6 @@ "LazyBranchProbabilityInfo.cpp", "LazyCallGraph.cpp", "LazyValueInfo.cpp", - "LegacyDivergenceAnalysis.cpp", "Lint.cpp", "Loads.cpp", "Local.cpp", @@ -118,7 +116,6 @@ "ScopedNoAliasAA.cpp", "StackLifetime.cpp", "StackSafetyAnalysis.cpp", - "SyncDependenceAnalysis.cpp", "SyntheticCountsUtils.cpp", "TFLiteUtils.cpp", "TargetLibraryInfo.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn @@ -24,7 +24,6 @@ "CaptureTrackingTest.cpp", "ConstraintSystemTest.cpp", "DDGTest.cpp", - "DivergenceAnalysisTest.cpp", "DomTreeUpdaterTest.cpp", "FunctionPropertiesAnalysisTest.cpp", "GlobalsModRefTest.cpp",