Index: include/llvm/Analysis/KernelDivergenceAnalysis.h =================================================================== --- include/llvm/Analysis/KernelDivergenceAnalysis.h +++ include/llvm/Analysis/KernelDivergenceAnalysis.h @@ -1,4 +1,4 @@ -//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// +//===- llvm/Analysis/KernelDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// The divergence analysis is an LLVM pass which can be used to find out -// if a branch instruction in a GPU program is divergent or not. It can help +// The kernel divergence analysis is an LLVM pass which can be used to find out +// if a branch instruction in a GPU program (kernel) is divergent or not. It can help // branch optimizations such as jump threading and loop unswitching to make // better decisions. // @@ -22,12 +22,12 @@ namespace llvm { class Value; -class DivergenceAnalysis : public FunctionPass { +class KernelDivergenceAnalysis : public FunctionPass { public: static char ID; - DivergenceAnalysis() : FunctionPass(ID) { - initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + KernelDivergenceAnalysis() : FunctionPass(ID) { + initializeKernelDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override; Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -61,10 +61,10 @@ //===--------------------------------------------------------------------===// // - // createDivergenceAnalysisPass - This pass determines which branches in a GPU + // createKernelDivergenceAnalysisPass - This pass determines which branches in a GPU // program are divergent. // - FunctionPass *createDivergenceAnalysisPass(); + FunctionPass *createKernelDivergenceAnalysisPass(); //===--------------------------------------------------------------------===// // Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -289,7 +289,7 @@ /// Returns whether V is a source of divergence. /// /// This function provides the target-dependent information for - /// the target-independent DivergenceAnalysis. DivergenceAnalysis first + /// the target-independent KernelDivergenceAnalysis. KernelDivergenceAnalysis first /// builds the dependency graph, and then runs the reachability algorithm /// starting with the sources of divergence. bool isSourceOfDivergence(const Value *V) const; Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -28,7 +28,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -229,7 +229,7 @@ LLVMContext *Context; CodeGenOpt::Level OptLevel; - DivergenceAnalysis * DA = nullptr; + KernelDivergenceAnalysis * DA = nullptr; FunctionLoweringInfo * FLI = nullptr; /// The function-level optimization remark emitter. Used to emit remarks @@ -382,7 +382,7 @@ /// Prepare this SelectionDAG to process code in the given MachineFunction. void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - DivergenceAnalysis * Divergence); + KernelDivergenceAnalysis * Divergence); void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) { FLI = FuncInfo; Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -29,7 +29,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -2648,7 +2648,7 @@ virtual bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, - DivergenceAnalysis *DA) const { + KernelDivergenceAnalysis *DA) const { return false; } Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -118,6 +118,7 @@ void initializeDependenceAnalysisPass(PassRegistry&); void initializeDependenceAnalysisWrapperPassPass(PassRegistry&); void initializeDetectDeadLanesPass(PassRegistry&); +void initializeKernelDivergenceAnalysisPass(PassRegistry&); void initializeDivRemPairsLegacyPassPass(PassRegistry&); void initializeDivergenceAnalysisPass(PassRegistry&); void initializeDomOnlyPrinterPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -94,7 +94,7 @@ (void) llvm::createDeadInstEliminationPass(); (void) llvm::createDeadStoreEliminationPass(); (void) llvm::createDependenceAnalysisWrapperPass(); - (void) llvm::createDivergenceAnalysisPass(); + (void) llvm::createKernelDivergenceAnalysisPass(); (void) llvm::createDomOnlyPrinterPass(); (void) llvm::createDomPrinterPass(); (void) llvm::createDomOnlyViewerPass(); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -39,7 +39,7 @@ initializeDependenceAnalysisWrapperPassPass(Registry); initializeDelinearizationPass(Registry); initializeDemandedBitsWrapperPassPass(Registry); - initializeDivergenceAnalysisPass(Registry); + initializeKernelDivergenceAnalysisPass(Registry); initializeDominanceFrontierWrapperPassPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -25,7 +25,6 @@ Delinearization.cpp DemandedBits.cpp DependenceAnalysis.cpp - DivergenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp EHPersonalities.cpp @@ -38,6 +37,7 @@ Interval.cpp IntervalPartition.cpp IteratedDominanceFrontier.cpp + KernelDivergenceAnalysis.cpp LazyBranchProbabilityInfo.cpp LazyBlockFrequencyInfo.cpp LazyCallGraph.cpp Index: lib/Analysis/KernelDivergenceAnalysis.cpp =================================================================== --- lib/Analysis/KernelDivergenceAnalysis.cpp +++ lib/Analysis/KernelDivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// +//===- KernelDivergenceAnalysis.cpp --------- Kernel Divergence Analysis Implementation -==// // // The LLVM Compiler Infrastructure // @@ -64,7 +64,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -265,25 +265,25 @@ } /// end namespace anonymous // Register this pass. -char DivergenceAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", +char KernelDivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(KernelDivergenceAnalysis, "divergence", "Kernel Divergence Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", +INITIALIZE_PASS_END(KernelDivergenceAnalysis, "divergence", "Kernel Divergence Analysis", false, true) -FunctionPass *llvm::createDivergenceAnalysisPass() { - return new DivergenceAnalysis(); +FunctionPass *llvm::createKernelDivergenceAnalysisPass() { + return new KernelDivergenceAnalysis(); } -void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { +void KernelDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } -bool DivergenceAnalysis::runOnFunction(Function &F) { +bool KernelDivergenceAnalysis::runOnFunction(Function &F) { auto *TTIWP = getAnalysisIfAvailable(); if (TTIWP == nullptr) return false; @@ -308,7 +308,7 @@ return false; } -void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const { +void KernelDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { if (DivergentValues.empty()) return; const Value *FirstDivergentValue = *DivergentValues.begin(); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -984,7 +984,7 @@ void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - DivergenceAnalysis * Divergence) { + KernelDivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -417,7 +417,7 @@ SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable()); + getAnalysisIfAvailable()); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. Index: lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -16,7 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/IR/IRBuilder.h" @@ -32,7 +32,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, public InstVisitor { - DivergenceAnalysis *DA; + KernelDivergenceAnalysis *DA; MemoryDependenceResults *MDR; LoopInfo *LI; DenseMap noClobberClones; @@ -49,7 +49,7 @@ return "AMDGPU Annotate Uniform Values"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); @@ -64,7 +64,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, @@ -176,7 +176,7 @@ if (skipFunction(F)) return false; - DA = &getAnalysis(); + DA = &getAnalysis(); MDR = &getAnalysis().getMemDep(); LI = &getAnalysis().getLoopInfo(); isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -18,7 +18,7 @@ #include "AMDGPUTargetMachine.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" @@ -60,7 +60,7 @@ public InstVisitor { const GCNSubtarget *ST = nullptr; AssumptionCache *AC = nullptr; - DivergenceAnalysis *DA = nullptr; + KernelDivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; bool HasUnsafeFPMath = false; AMDGPUAS AMDGPUASI; @@ -177,7 +177,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } }; @@ -898,7 +898,7 @@ const AMDGPUTargetMachine &TM = TPC->getTM(); ST = &TM.getSubtarget(F); AC = &getAnalysis().getAssumptionCache(F); - DA = &getAnalysis(); + DA = &getAnalysis(); HasUnsafeFPMath = hasUnsafeFPMath(F); AMDGPUASI = TM.getAMDGPUAS(); @@ -918,7 +918,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -29,7 +29,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -87,7 +87,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); SelectionDAGISel::getAnalysisUsage(AU); } @@ -257,7 +257,7 @@ "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) Index: lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -16,7 +16,6 @@ #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" Index: lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -25,7 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" @@ -70,7 +70,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) @@ -78,10 +78,10 @@ // TODO: Preserve dominator tree. AU.addRequired(); - AU.addRequired(); + AU.addRequired(); // No divergent values are changed, only blocks and branch edges. - AU.addPreserved(); + AU.addPreserved(); // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); @@ -95,7 +95,7 @@ /// \returns true if \p BB is reachable through only uniform branches. /// XXX - Is there a more efficient way to find this? -static bool isUniformlyReached(const DivergenceAnalysis &DA, +static bool isUniformlyReached(const KernelDivergenceAnalysis &DA, BasicBlock &BB) { SmallVector Stack; SmallPtrSet Visited; @@ -163,7 +163,7 @@ if (PDT.getRoots().size() <= 1) return false; - DivergenceAnalysis &DA = getAnalysis(); + KernelDivergenceAnalysis &DA = getAnalysis(); // Loop over all of the blocks in a function, tracking all of the blocks that // return. Index: lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" @@ -52,7 +52,7 @@ using StackVector = SmallVector; class SIAnnotateControlFlow : public FunctionPass { - DivergenceAnalysis *DA; + KernelDivergenceAnalysis *DA; Type *Boolean; Type *Void; @@ -116,7 +116,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } @@ -127,7 +127,7 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE, "Annotate SI Control Flow", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE, "Annotate SI Control Flow", false, false) @@ -386,7 +386,7 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); - DA = &getAnalysis(); + DA = &getAnalysis(); for (df_iterator I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -324,7 +324,7 @@ unsigned Depth = 0) const override; bool isSDNodeSourceOfDivergence(const SDNode *N, - FunctionLoweringInfo *FLI, DivergenceAnalysis *DA) const override; + FunctionLoweringInfo *FLI, KernelDivergenceAnalysis *DA) const override; bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth = 5) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -8486,7 +8486,7 @@ } bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, - FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const + FunctionLoweringInfo * FLI, KernelDivergenceAnalysis * KDA) const { switch (N->getOpcode()) { case ISD::Register: @@ -8519,7 +8519,7 @@ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv())) return true; } - return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg)); + return !KDA || KDA->isDivergent(FLI->getValueFromVirtualReg(Reg)); } } break; Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -33,7 +33,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -215,7 +215,7 @@ AU.addRequired(); AU.addRequired(); if (hasBranchDivergence) - AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -383,7 +383,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) @@ -864,7 +864,7 @@ return false; } if (hasBranchDivergence && - getAnalysis().isDivergent(LoopCond)) { + getAnalysis().isDivergent(LoopCond)) { LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << " at non-trivial condition '" << *Val Index: lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- lib/Transforms/Scalar/StructurizeCFG.cpp +++ lib/Transforms/Scalar/StructurizeCFG.cpp @@ -13,7 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/KernelDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" @@ -183,7 +183,7 @@ Function *Func; Region *ParentRegion; - DivergenceAnalysis *DA; + KernelDivergenceAnalysis *DA; DominatorTree *DT; LoopInfo *LI; @@ -269,7 +269,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { if (SkipUniformRegions) - AU.addRequired(); + AU.addRequired(); AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addRequired(); @@ -285,7 +285,7 @@ INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG", false, false) -INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(KernelDivergenceAnalysis) INITIALIZE_PASS_DEPENDENCY(LowerSwitch) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) @@ -914,7 +914,7 @@ } static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, - const DivergenceAnalysis &DA) { + const KernelDivergenceAnalysis &DA) { for (auto E : R->elements()) { if (!E->isSubRegion()) { auto Br = dyn_cast(E->getEntry()->getTerminator()); @@ -962,7 +962,7 @@ // but we shouldn't rely on metadata for correctness! unsigned UniformMDKindID = R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); - DA = &getAnalysis(); + DA = &getAnalysis(); if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R Index: test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -1,45 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s - -; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst -define i32 @test1(i32* %ptr, i32 %val) #0 { - %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst - ret i32 %orig -} - -; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) { - %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst - ret {i32, i1} %orig -} - -; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) -define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { - %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) - ret i32 %ret -} - -; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false) -define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 { - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false) - ret i64 %ret -} - -; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) -define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) - ret i32 %ret -} - -; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false) -define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 { - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val, i32 0, i32 0, i1 false) - ret i64 %ret -} - -declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 -declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 -declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 -declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind argmemonly } Index: test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll @@ -1,13 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s - -; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 -define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { - %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 - store i32 %swizzle, i32 addrspace(1)* %out, align 4 - ret void -} - -declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 - -attributes #0 = { nounwind convergent } -attributes #1 = { nounwind readnone convergent } Index: test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg @@ -1,2 +0,0 @@ -if not 'AMDGPU' in config.root.targets: - config.unsupported = True Index: test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -1,103 +0,0 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap( -define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.add( -define float @buffer_atomic_add(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.sub( -define float @buffer_atomic_sub(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smin( -define float @buffer_atomic_smin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.smin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umin( -define float @buffer_atomic_umin(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.umin(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.smax( -define float @buffer_atomic_smax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.smax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.umax( -define float @buffer_atomic_umax(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.umax(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.and( -define float @buffer_atomic_and(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.and(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.or( -define float @buffer_atomic_or(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.or(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.xor( -define float @buffer_atomic_xor(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.xor(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap( -define float @buffer_atomic_cmpswap(<4 x i32> inreg %rsrc, i32 inreg %data, i32 inreg %cmp) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0) - %r = bitcast i32 %orig to float - ret float %r -} - -declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0 -declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0 - -attributes #0 = { nounwind } Index: test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll @@ -1,131 +0,0 @@ -;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32( -define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32( -define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32( -define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32( -define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32( -define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32( -define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32( -define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32( -define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32( -define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32( -define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32( -define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32( -define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32( -define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32( -define float @image_atomic_add_2d(<8 x i32> inreg %rsrc, i32 inreg %s, i32 inreg %t, i32 inreg %data) #0 { -main_body: - %orig = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - %r = bitcast i32 %orig to float - ret float %r -} - -declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0 -declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 - -declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 - -attributes #0 = { nounwind } Index: test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/no-return-blocks.ll @@ -1,30 +0,0 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s - -; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 -; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 -; CHECK: DIVERGENT: %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4 - -; The post dominator tree does not have a root node in this case -define amdgpu_kernel void @no_return_blocks(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1) #0 { -bb0: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 - %tmp2 = sext i32 %tmp to i64 - %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2 - %tmp6 = load volatile float, float addrspace(1)* %tmp5, align 4 - %tmp8 = fcmp olt float %tmp6, 0.000000e+00 - br i1 %tmp8, label %bb1, label %bb2 - -bb1: - %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4 - br label %bb2 - -bb2: - %tmp11 = load volatile float, float addrspace(1)* %tmp5, align 4 - br label %bb1 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.amdgcn.workitem.id.x() #1 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } Index: test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll @@ -1,31 +0,0 @@ -; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s - -; CHECK-LABEL: 'test1': -; CHECK-NEXT: DIVERGENT: i32 %bound -; CHECK: {{^ *}}%counter = -; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound -; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body -; CHECK: {{^ *}}%counter.next = -; CHECK: {{^ *}}%counter.footer = -; CHECK: DIVERGENT: br i1 %break, label %end, label %header -; Note: %counter is not divergent! -define amdgpu_ps void @test1(i32 %bound) { -entry: - br label %header - -header: - %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ] - %break = icmp sge i32 %counter, %bound - br i1 %break, label %footer, label %body - -body: - %counter.next = add i32 %counter, 1 - br label %footer - -footer: - %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ] - br i1 %break, label %end, label %header - -end: - ret void -} Index: test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/unreachable-loop-block.ll @@ -1,17 +0,0 @@ -; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s - -; CHECK: DIVERGENT: %tmp = cmpxchg volatile -define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 { -entry: - unreachable - -unreachable_loop: ; preds = %do.body.i, %if.then11 - %tmp = cmpxchg volatile i32 addrspace(1)* null, i32 0, i32 0 seq_cst seq_cst - %cmp.i = extractvalue { i32, i1 } %tmp, 1 - br i1 %cmp.i, label %unreachable_loop, label %end - -end: ; preds = %do.body.i51, %atomicAdd_g_f.exit - unreachable -} - -attributes #0 = { norecurse nounwind } Index: test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/workitem-intrinsics.ll @@ -1,45 +0,0 @@ -; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s - -declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i32 @llvm.amdgcn.workitem.id.y() #0 -declare i32 @llvm.amdgcn.workitem.id.z() #0 -declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 -declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 - -; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @workitem_id_x() #1 { - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - store volatile i32 %id.x, i32 addrspace(1)* undef - ret void -} - -; CHECK: DIVERGENT: %id.y = call i32 @llvm.amdgcn.workitem.id.y() -define amdgpu_kernel void @workitem_id_y() #1 { - %id.y = call i32 @llvm.amdgcn.workitem.id.y() - store volatile i32 %id.y, i32 addrspace(1)* undef - ret void -} - -; CHECK: DIVERGENT: %id.z = call i32 @llvm.amdgcn.workitem.id.z() -define amdgpu_kernel void @workitem_id_z() #1 { - %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.z, i32 addrspace(1)* undef - ret void -} - -; CHECK: DIVERGENT: %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0) -define amdgpu_kernel void @mbcnt_lo() #1 { - %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 0, i32 0) - store volatile i32 %mbcnt.lo, i32 addrspace(1)* undef - ret void -} - -; CHECK: DIVERGENT: %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) -define amdgpu_kernel void @mbcnt_hi() #1 { - %mbcnt.hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 0, i32 0) - store volatile i32 %mbcnt.hi, i32 addrspace(1)* undef - ret void -} - -attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } Index: test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/NVPTX/lit.local.cfg @@ -1,2 +0,0 @@ -if not 'NVPTX' in config.root.targets: - config.unsupported = True Index: test/Analysis/KernelDivergenceAnalysis/AMDGPU/kernel-args.ll =================================================================== --- test/Analysis/KernelDivergenceAnalysis/AMDGPU/kernel-args.ll +++ test/Analysis/KernelDivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,6 +1,6 @@ ; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_ps': +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: ; CHECK-NOT: %arg0 ; CHECK-NOT: %arg1 @@ -14,7 +14,7 @@ ret void } -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_kernel': +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'test_amdgpu_kernel': ; CHECK-NOT: %arg0 ; CHECK-NOT: %arg1 ; CHECK-NOT: %arg2 @@ -26,7 +26,7 @@ ret void } -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_c': +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'test_c': ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: ; CHECK: DIVERGENT: Index: test/Analysis/KernelDivergenceAnalysis/NVPTX/diverge.ll =================================================================== --- test/Analysis/KernelDivergenceAnalysis/NVPTX/diverge.ll +++ test/Analysis/KernelDivergenceAnalysis/NVPTX/diverge.ll @@ -5,7 +5,7 @@ ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'no_diverge' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'no_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() %cond = icmp slt i32 %n, 0 @@ -27,7 +27,7 @@ ; c = b; ; return c; // c is divergent: sync dependent define i32 @sync(i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'sync' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'sync' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() %cond = icmp slt i32 %tid, 5 @@ -48,7 +48,7 @@ ; // c here is divergent because it is sync dependent on threadIdx.x >= 5 ; return c; define i32 @mixed(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'mixed' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'mixed' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() %cond = icmp slt i32 %tid, 5 @@ -73,7 +73,7 @@ ; We conservatively treats all parameters of a __device__ function as divergent. define i32 @device(i32 %n, i32 %a, i32 %b) { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'device' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'device' ; CHECK: DIVERGENT: i32 %n ; CHECK: DIVERGENT: i32 %a ; CHECK: DIVERGENT: i32 %b @@ -98,7 +98,7 @@ ; ; The i defined in the loop is used outside. define i32 @loop() { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'loop' entry: %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() br label %loop @@ -120,7 +120,7 @@ ; Same as @loop, but the loop is in the LCSSA form. define i32 @lcssa() { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'lcssa' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'lcssa' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br label %loop @@ -156,7 +156,7 @@ ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). define i32 @unstructured_loop(i1 %entry_cond) { -; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'unstructured_loop' +; CHECK-LABEL: Printing analysis 'Kernel Divergence Analysis' for function 'unstructured_loop' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() br i1 %entry_cond, label %loop_entry_1, label %loop_entry_2