Index: include/llvm/Analysis/DivergenceAnalysis.h =================================================================== --- /dev/null +++ include/llvm/Analysis/DivergenceAnalysis.h @@ -0,0 +1,90 @@ +//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines divergence analysis which determines whether a branch in a +// GPU program is divergent. It can help branch optimizations such as jump +// threading and loop unswitching to make better decisions. +// +// GPU programs typically use the SIMD execution model, where multiple threads +// in the same execution group have to execute in lock-step. Therefore, if the +// code contains divergent branches (i.e., threads in a group do not agree on +// which path of the branch to take), the group of threads has to execute all +// the paths from that branch with different subsets of threads enabled until +// they converge at the immediately post-dominating BB of the paths. +// +// Due to this execution model, some optimizations such as jump +// threading and loop unswitching can be unfortunately harmful when performed on +// divergent branches. Therefore, an analysis that computes which branches in a +// GPU program are divergent can help the compiler to selectively run these +// optimizations. +// +// This file defines divergence analysis which computes a conservative but +// non-trivial approximation of all divergent branches in a GPU program. It +// partially implements the approach described in +// +// Divergence Analysis +// Sampaio, Souza, Collange, Pereira +// TOPLAS '13 +// +// The divergence analysis identifies the sources of divergence (e.g., special +// variables that hold the thread ID), and recursively marks variables that are +// data or sync dependent on a source of divergence as divergent. +// +// While data dependency is a well-known concept, the notion of sync dependency +// is worth more explanation. Sync dependence characterizes the control flow +// aspect of the propagation of branch divergence. For example, +// +// %cond = icmp slt i32 %tid, 10 +// br i1 %cond, label %then, label %else +// then: +// br label %merge +// else: +// br label %merge +// merge: +// %a = phi i32 [ 0, %then ], [ 1, %else ] +// +// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid +// because %tid is not on its use-def chains, %a is sync dependent on %tid +// because the branch "br i1 %cond" depends on %tid and affects which value %a +// is assigned to. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" + +namespace llvm { +class Value; +class DivergenceAnalysis : public FunctionPass { +public: + static char ID; + + DivergenceAnalysis() : FunctionPass(ID) { + initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + + // Print all divergent branches in the function. + void print(raw_ostream &OS, const Module *) const override; + + // Returns true if V is divergent. + bool isDivergent(const Value *V) const { return DivergentValues.count(V); } + + // Returns true if V is uniform/non-divergent. + bool isUniform(const Value *V) const { return !isDivergent(V); } + +private: + // Stores all divergent values. + DenseSet DivergentValues; +}; +} // End llvm Index: lib/Analysis/DivergenceAnalysis.cpp =================================================================== --- lib/Analysis/DivergenceAnalysis.cpp +++ lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==// // // The LLVM Compiler Infrastructure // @@ -7,52 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines divergence analysis which determines whether a branch in a -// GPU program is divergent. It can help branch optimizations such as jump -// threading and loop unswitching to make better decisions. -// -// GPU programs typically use the SIMD execution model, where multiple threads -// in the same execution group have to execute in lock-step. Therefore, if the -// code contains divergent branches (i.e., threads in a group do not agree on -// which path of the branch to take), the group of threads has to execute all -// the paths from that branch with different subsets of threads enabled until -// they converge at the immediately post-dominating BB of the paths. -// -// Due to this execution model, some optimizations such as jump -// threading and loop unswitching can be unfortunately harmful when performed on -// divergent branches. Therefore, an analysis that computes which branches in a -// GPU program are divergent can help the compiler to selectively run these -// optimizations. -// -// This file defines divergence analysis which computes a conservative but -// non-trivial approximation of all divergent branches in a GPU program. It -// partially implements the approach described in -// -// Divergence Analysis -// Sampaio, Souza, Collange, Pereira -// TOPLAS '13 -// -// The divergence analysis identifies the sources of divergence (e.g., special -// variables that hold the thread ID), and recursively marks variables that are -// data or sync dependent on a source of divergence as divergent. -// -// While data dependency is a well-known concept, the notion of sync dependency -// is worth more explanation. Sync dependence characterizes the control flow -// aspect of the propagation of branch divergence. For example, -// -// %cond = icmp slt i32 %tid, 10 -// br i1 %cond, label %then, label %else -// then: -// br label %merge -// else: -// br label %merge -// merge: -// %a = phi i32 [ 0, %then ], [ 1, %else ] -// -// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid -// because %tid is not on its use-def chains, %a is sync dependent on %tid -// because the branch "br i1 %cond" depends on %tid and affects which value %a -// is assigned to. +// This file implements divergence analysis which determines whether a branch +// in a GPU program is divergent. // // The current implementation has the following limitations: // 1. intra-procedural. It conservatively considers the arguments of a @@ -61,75 +17,31 @@ // 2. memory as black box. It conservatively considers values loaded from // generic or local address as divergent. This can be improved by leveraging // pointer analysis. +// //===----------------------------------------------------------------------===// -#include -#include "llvm/IR/Dominators.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include using namespace llvm; -#define DEBUG_TYPE "divergence" - -namespace { -class DivergenceAnalysis : public FunctionPass { -public: - static char ID; - - DivergenceAnalysis() : FunctionPass(ID) { - initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesAll(); - } - - bool runOnFunction(Function &F) override; - - // Print all divergent branches in the function. - void print(raw_ostream &OS, const Module *) const override; - - // Returns true if V is divergent. - bool isDivergent(const Value *V) const { return DivergentValues.count(V); } - // Returns true if V is uniform/non-divergent. - bool isUniform(const Value *V) const { return !isDivergent(V); } - -private: - // Stores all divergent values. - DenseSet DivergentValues; -}; -} // End of anonymous namespace - -// Register this pass. -char DivergenceAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", - false, true) - namespace { class DivergencePropagator { public: - DivergencePropagator(Function &F, TargetTransformInfo &TTI, - DominatorTree &DT, PostDominatorTree &PDT, - DenseSet &DV) + DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, + PostDominatorTree &PDT, DenseSet &DV) : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {} void populateWithSourcesOfDivergence(); void propagate(); @@ -153,7 +65,7 @@ DominatorTree &DT; PostDominatorTree &PDT; std::vector Worklist; // Stack for DFS. - DenseSet &DV; // Stores all divergent values. + DenseSet &DV; // Stores all divergent values. }; void DivergencePropagator::populateWithSourcesOfDivergence() { @@ -286,10 +198,25 @@ } /// end namespace anonymous +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", + false, true) + FunctionPass *llvm::createDivergenceAnalysisPass() { return new DivergenceAnalysis(); } +void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); +} + bool DivergenceAnalysis::runOnFunction(Function &F) { auto *TTIWP = getAnalysisIfAvailable(); if (TTIWP == nullptr)