Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -180,7 +180,7 @@ void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); -void initializeLoopDataPrefetchPass(PassRegistry&); +void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &); void initializeLoopDeletionLegacyPassPass(PassRegistry&); void initializeLoopDistributeLegacyPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); Index: include/llvm/Transforms/Scalar/LoopDataPrefetch.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/LoopDataPrefetch.h @@ -0,0 +1,31 @@ +//===-------- LoopDataPrefetch.h - Loop Data Prefetching Pass ---*- C++ -*-===// +// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for LLVM's Loop Data Prefetching Pass. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H +#define LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// An optimization pass inserting data prefetches in loops. +class LoopDataPrefetchPass : public PassInfoMixin { +public: + LoopDataPrefetchPass() {} + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} + +#endif Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -93,6 +93,7 @@ #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -159,6 +159,7 @@ FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) +FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) Index: lib/Transforms/Scalar/LoopDataPrefetch.cpp =================================================================== --- lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" + #define DEBUG_TYPE "loop-data-prefetch" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -59,80 +61,89 @@ STATISTIC(NumPrefetches, "Number of prefetches inserted"); -namespace llvm { - void initializeLoopDataPrefetchPass(PassRegistry&); -} - namespace { - class LoopDataPrefetch : public FunctionPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopDataPrefetch() : FunctionPass(ID) { - initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); - } +/// Loop prefetch implementation class. +class LoopDataPrefetch { +public: + LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, + const TargetTransformInfo *TTI, + OptimizationRemarkEmitter *ORE) + : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {} - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addRequired(); - // FIXME: For some reason, preserving SE here breaks LSR (even if - // this pass changes nothing). - // AU.addPreserved(); - AU.addRequired(); - } + bool run(); - bool runOnFunction(Function &F) override; +private: + bool runOnLoop(Loop *L); - private: - bool runOnLoop(Loop *L); + /// \brief Check if the the stride of the accesses is large enough to + /// warrant a prefetch. + bool isStrideLargeEnough(const SCEVAddRecExpr *AR); - /// \brief Check if the the stride of the accesses is large enough to - /// warrant a prefetch. - bool isStrideLargeEnough(const SCEVAddRecExpr *AR); + unsigned getMinPrefetchStride() { + if (MinPrefetchStride.getNumOccurrences() > 0) + return MinPrefetchStride; + return TTI->getMinPrefetchStride(); + } - unsigned getMinPrefetchStride() { - if (MinPrefetchStride.getNumOccurrences() > 0) - return MinPrefetchStride; - return TTI->getMinPrefetchStride(); - } + unsigned getPrefetchDistance() { + if (PrefetchDistance.getNumOccurrences() > 0) + return PrefetchDistance; + return TTI->getPrefetchDistance(); + } - unsigned getPrefetchDistance() { - if (PrefetchDistance.getNumOccurrences() > 0) - return PrefetchDistance; - return TTI->getPrefetchDistance(); - } + unsigned getMaxPrefetchIterationsAhead() { + if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) + return MaxPrefetchIterationsAhead; + return TTI->getMaxPrefetchIterationsAhead(); + } - unsigned getMaxPrefetchIterationsAhead() { - if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0) - return MaxPrefetchIterationsAhead; - return TTI->getMaxPrefetchIterationsAhead(); - } + AssumptionCache *AC; + LoopInfo *LI; + ScalarEvolution *SE; + const TargetTransformInfo *TTI; + OptimizationRemarkEmitter *ORE; +}; + +/// Legacy class for inserting loop data prefetches. +class LoopDataPrefetchLegacyPass : public FunctionPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopDataPrefetchLegacyPass() : FunctionPass(ID) { + initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry()); + } - AssumptionCache *AC; - LoopInfo *LI; - ScalarEvolution *SE; - const TargetTransformInfo *TTI; - const DataLayout *DL; - OptimizationRemarkEmitter *ORE; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + // FIXME: For some reason, preserving SE here breaks LSR (even if + // this pass changes nothing). + // AU.addPreserved(); + AU.addRequired(); + } + + bool runOnFunction(Function &F) override; }; } -char LoopDataPrefetch::ID = 0; -INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", +char LoopDataPrefetchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", +INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch", "Loop Data Prefetch", false, false) -FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); } +FunctionPass *llvm::createLoopDataPrefetchPass() { + return new LoopDataPrefetchLegacyPass(); +} bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) { unsigned TargetMinStride = getMinPrefetchStride(); @@ -150,17 +161,46 @@ return TargetMinStride <= AbsStride; } -bool LoopDataPrefetch::runOnFunction(Function &F) { +PreservedAnalyses LoopDataPrefetchPass::run(Function &F, + FunctionAnalysisManager &AM) { + LoopInfo *LI = &AM.getResult(F); + ScalarEvolution *SE = &AM.getResult(F); + AssumptionCache *AC = &AM.getResult(F); + OptimizationRemarkEmitter *ORE = + &AM.getResult(F); + const TargetTransformInfo *TTI = &AM.getResult(F); + + LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE); + bool Changed = LDP.run(); + + if (Changed) { + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; + } + + return PreservedAnalyses::all(); +} + +bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis().getSE(); - DL = &F.getParent()->getDataLayout(); - AC = &getAnalysis().getAssumptionCache(F); - ORE = &getAnalysis().getORE(); - TTI = &getAnalysis().getTTI(F); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + ScalarEvolution *SE = &getAnalysis().getSE(); + AssumptionCache *AC = + &getAnalysis().getAssumptionCache(F); + OptimizationRemarkEmitter *ORE = + &getAnalysis().getORE(); + const TargetTransformInfo *TTI = + &getAnalysis().getTTI(F); + + LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE); + return LDP.run(); +} +bool LoopDataPrefetch::run() { // If PrefetchDistance is not set, don't run the pass. This gives an // opportunity for targets to run this pass for selected subtargets only // (whose TTI sets PrefetchDistance). Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -50,7 +50,7 @@ initializeIndVarSimplifyLegacyPassPass(Registry); initializeJumpThreadingPass(Registry); initializeLegacyLICMPassPass(Registry); - initializeLoopDataPrefetchPass(Registry); + initializeLoopDataPrefetchLegacyPassPass(Registry); initializeLoopDeletionLegacyPassPass(Registry); initializeLoopAccessLegacyAnalysisPass(Registry); initializeLoopInstSimplifyLegacyPassPass(Registry); Index: test/Transforms/LoopDataPrefetch/PowerPC/basic.ll =================================================================== --- test/Transforms/LoopDataPrefetch/PowerPC/basic.ll +++ test/Transforms/LoopDataPrefetch/PowerPC/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt -mcpu=a2 -loop-data-prefetch -S < %s | FileCheck %s +; RUN: opt -mcpu=a2 -passes=loop-data-prefetch -S < %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-bgq-linux"