Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -313,7 +313,7 @@ void initializeSinkingLegacyPassPass(PassRegistry&); void initializeSjLjEHPreparePass(PassRegistry&); void initializeSlotIndexesPass(PassRegistry&); -void initializeSpeculativeExecutionPass(PassRegistry&); +void initializeSpeculativeExecutionLegacyPassPass(PassRegistry&); void initializeSpillPlacementPass(PassRegistry&); void initializeStackColoringPass(PassRegistry&); void initializeStackMapLivenessPass(PassRegistry&); Index: llvm/trunk/include/llvm/Transforms/Scalar/SpeculativeExecution.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/SpeculativeExecution.h +++ llvm/trunk/include/llvm/Transforms/Scalar/SpeculativeExecution.h @@ -0,0 +1,92 @@ +//===- SpeculativeExecution.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass hoists instructions to enable speculative execution on +// targets where branches are expensive. This is aimed at GPUs. It +// currently works on simple if-then and if-then-else +// patterns. +// +// Removing branches is not the only motivation for this +// pass. E.g. consider this code and assume that there is no +// addressing mode for multiplying by sizeof(*a): +// +// if (b > 0) +// c = a[i + 1] +// if (d > 0) +// e = a[i + 2] +// +// turns into +// +// p = &a[i + 1]; +// if (b > 0) +// c = *p; +// q = &a[i + 2]; +// if (d > 0) +// e = *q; +// +// which could later be optimized to +// +// r = &a[i]; +// if (b > 0) +// c = r[1]; +// if (d > 0) +// e = r[2]; +// +// Later passes sink back much of the speculated code that did not enable +// further optimization. +// +// This pass is more aggressive than the function SpeculativeyExecuteBB in +// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and +// it will speculate at most one instruction. It also will not speculate if +// there is a value defined in the if-block that is only used in the then-block. +// These restrictions make sense since the speculation in SimplifyCFG seems +// aimed at introducing cheap selects, while this pass is intended to do more +// aggressive speculation while counting on later passes to either capitalize on +// that or clean it up. +// +// If the pass was created by calling +// createSpeculativeExecutionIfHasBranchDivergencePass or the +// -spec-exec-only-if-divergent-target option is present, this pass only has an +// effect on targets where TargetTransformInfo::hasBranchDivergence() is true; +// on other targets, it is a nop. +// +// This lets you include this pass unconditionally in the IR pass pipeline, but +// only enable it for relevant targets. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H +#define LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class SpeculativeExecutionPass + : public PassInfoMixin { +public: + SpeculativeExecutionPass(bool OnlyIfDivergentTarget = false); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F, TargetTransformInfo *TTI); + +private: + bool runOnBasicBlock(BasicBlock &B); + bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); + + // If true, this pass is a nop unless the target architecture has branch + // divergence. + const bool OnlyIfDivergentTarget = false; + + TargetTransformInfo *TTI = nullptr; +}; +} + +#endif //LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -112,6 +112,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" Index: llvm/trunk/lib/Passes/PassRegistry.def =================================================================== --- llvm/trunk/lib/Passes/PassRegistry.def +++ llvm/trunk/lib/Passes/PassRegistry.def @@ -175,6 +175,7 @@ FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) +FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -80,7 +80,7 @@ initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); - initializeSpeculativeExecutionPass(Registry); + initializeSpeculativeExecutionLegacyPassPass(Registry); initializeStraightLineStrengthReducePass(Registry); initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); Index: llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ llvm/trunk/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -61,9 +61,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -101,58 +101,62 @@ namespace { -class SpeculativeExecution : public FunctionPass { - public: - static char ID; - explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false) - : FunctionPass(ID), - OnlyIfDivergentTarget(OnlyIfDivergentTarget || - SpecExecOnlyIfDivergentTarget) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnFunction(Function &F) override; - - const char *getPassName() const override { - if (OnlyIfDivergentTarget) - return "Speculatively execute instructions if target has divergent " - "branches"; - return "Speculatively execute instructions"; - } - - private: - bool runOnBasicBlock(BasicBlock &B); - bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); +class SpeculativeExecutionLegacyPass : public FunctionPass { +public: + static char ID; + explicit SpeculativeExecutionLegacyPass(bool OnlyIfDivergentTarget = false) + : FunctionPass(ID), OnlyIfDivergentTarget(OnlyIfDivergentTarget || + SpecExecOnlyIfDivergentTarget), + Impl(OnlyIfDivergentTarget) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + if (OnlyIfDivergentTarget) + return "Speculatively execute instructions if target has divergent " + "branches"; + return "Speculatively execute instructions"; + } - // If true, this pass is a nop unless the target architecture has branch - // divergence. +private: + // Variable preserved purely for correct name printing. const bool OnlyIfDivergentTarget; - const TargetTransformInfo *TTI = nullptr; + + SpeculativeExecutionPass Impl; }; } // namespace -char SpeculativeExecution::ID = 0; -INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution", +char SpeculativeExecutionLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SpeculativeExecutionLegacyPass, "speculative-execution", "Speculatively execute instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution", +INITIALIZE_PASS_END(SpeculativeExecutionLegacyPass, "speculative-execution", "Speculatively execute instructions", false, false) -void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const { +void SpeculativeExecutionLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); } -bool SpeculativeExecution::runOnFunction(Function &F) { +bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - TTI = &getAnalysis().getTTI(F); + auto *TTI = &getAnalysis().getTTI(F); + return Impl.runImpl(F, TTI); +} + +namespace llvm { + +bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) { if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) { DEBUG(dbgs() << "Not running SpeculativeExecution because " "TTI->hasBranchDivergence() is false.\n"); return false; } + this->TTI = TTI; bool Changed = false; for (auto& B : F) { Changed |= runOnBasicBlock(B); @@ -160,7 +164,7 @@ return Changed; } -bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) { +bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) { BranchInst *BI = dyn_cast(B.getTerminator()); if (BI == nullptr) return false; @@ -227,8 +231,8 @@ } } -bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock, - BasicBlock &ToBlock) { +bool SpeculativeExecutionPass::considerHoistingFromTo( + BasicBlock &FromBlock, BasicBlock &ToBlock) { SmallSet NotHoisted; const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) { for (Value* V : U->operand_values()) { @@ -270,14 +274,28 @@ return true; } -namespace llvm { - FunctionPass *createSpeculativeExecutionPass() { - return new SpeculativeExecution(); + return new SpeculativeExecutionLegacyPass(); } FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { - return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true); + return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true); } +SpeculativeExecutionPass::SpeculativeExecutionPass(bool OnlyIfDivergentTarget) + : OnlyIfDivergentTarget(OnlyIfDivergentTarget || + SpecExecOnlyIfDivergentTarget) {} + +PreservedAnalyses SpeculativeExecutionPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto *TTI = &AM.getResult(F); + + bool Changed = runImpl(F, TTI); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} } // namespace llvm Index: llvm/trunk/test/Transforms/SpeculativeExecution/spec.ll =================================================================== --- llvm/trunk/test/Transforms/SpeculativeExecution/spec.ll +++ llvm/trunk/test/Transforms/SpeculativeExecution/spec.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -S -speculative-execution \ ; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \ ; RUN: | FileCheck %s +; RUN: opt < %s -S -passes='speculative-execution' \ +; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \ +; RUN: | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"