Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -186,6 +186,7 @@ (void) llvm::createScalarizerPass(); (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createSpeculativeExecutionPass(); + (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); (void) llvm::createRewriteSymbolsPass(); (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -430,6 +430,10 @@ // FunctionPass *createSpeculativeExecutionPass(); +// Same as createSpeculativeExecutionPass, but does nothing unless +// TargetTransformInfo::hasBranchDivergence() is true. +FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); + //===----------------------------------------------------------------------===// // // LoadCombine - Combine loads into bigger loads. Index: llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -50,6 +50,15 @@ // aggressive speculation while counting on later passes to either capitalize on // that or clean it up. // +// If the pass was created by calling +// createSpeculativeExecutionIfHasBranchDivergencePass or the +// -spec-exec-only-if-divergent-target option is present, this pass only has an +// effect on targets where TargetTransformInfo::hasBranchDivergence() is true; +// on other targets, it is a nop. +// +// This lets you include this pass unconditionally in the IR pass pipeline, but +// only enable it for relevant targets. +// //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallSet.h" @@ -83,19 +92,39 @@ "number of instructions that would not be speculatively executed " "exceeds this limit.")); +static cl::opt SpecExecOnlyIfDivergentTarget( + "spec-exec-only-if-divergent-target", cl::init(0), cl::Hidden, + cl::desc("Speculative execution is applied only to targets with divergent " + "branches, even if the pass was configured to apply only to all " + "targets.")); + namespace { + class SpeculativeExecution : public FunctionPass { public: - static char ID; - SpeculativeExecution(): FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; + static char ID; + explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false) + : FunctionPass(ID), + OnlyIfDivergentTarget(OnlyIfDivergentTarget || + SpecExecOnlyIfDivergentTarget) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + if (OnlyIfDivergentTarget) + return "Speculatively execute instructions if target has divergent " + "branches"; + return "Speculatively execute instructions"; + } private: bool runOnBasicBlock(BasicBlock &B); bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); + // If true, this pass is a nop unless the target Targetitecture has branch + // divergence. + const bool OnlyIfDivergentTarget; const TargetTransformInfo *TTI = nullptr; }; } // namespace @@ -105,7 +134,7 @@ "Speculatively execute instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution", - "Speculatively execute instructions", false, false) + "Speculatively execute instructions", false, false) void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -116,6 +145,11 @@ return false; TTI = &getAnalysis().getTTI(F); + if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) { + DEBUG(dbgs() << "Not running SpeculativeExecution because " + "TTI->hasBranchDivergence() is false.\n"); + return false; + } bool Changed = false; for (auto& B : F) { @@ -240,4 +274,8 @@ return new SpeculativeExecution(); } +FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { + return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true); +} + } // namespace llvm Index: llvm/trunk/test/Transforms/SpeculativeExecution/divergent-target.ll =================================================================== --- llvm/trunk/test/Transforms/SpeculativeExecution/divergent-target.ll +++ llvm/trunk/test/Transforms/SpeculativeExecution/divergent-target.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution | \ +; RUN: FileCheck --check-prefix=ON %s +; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -speculative-execution \ +; RUN: -spec-exec-only-if-divergent-target | \ +; RUN: FileCheck --check-prefix=ON %s +; RUN: opt < %s -S -march=x86_64 -speculative-execution \ +; RUN: -spec-exec-only-if-divergent-target | \ +; RUN: FileCheck --check-prefix=OFF %s + +; Hoist in if-then pattern. +define void @f() { +; ON: %x = add i32 2, 3 +; ON: br i1 true +; OFF: br i1 true +; OFF: %x = add i32 2, 3 + br i1 true, label %a, label %b +a: + %x = add i32 2, 3 + br label %b +b: + ret void +}