Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -177,7 +177,7 @@ void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); -void initializeLoopDistributePass(PassRegistry&); +void initializeLoopDistributeLegacyPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); Index: llvm/trunk/include/llvm/Transforms/Scalar/LoopDistribute.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/LoopDistribute.h +++ llvm/trunk/include/llvm/Transforms/Scalar/LoopDistribute.h @@ -0,0 +1,30 @@ +//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Loop Distribution Pass. Its main focus is to +// distribute loops that cannot be vectorized due to dependence cycles. It +// tries to isolate the offending dependences into a new loop allowing +// vectorization of the remaining parts. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H +#define LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class LoopDistributePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -92,6 +92,7 @@ #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopRotation.h" Index: llvm/trunk/lib/Passes/PassRegistry.def =================================================================== --- llvm/trunk/lib/Passes/PassRegistry.def +++ llvm/trunk/lib/Passes/PassRegistry.def @@ -151,6 +151,7 @@ FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) +FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) Index: llvm/trunk/lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopDistribute.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopDistribute.cpp @@ -22,6 +22,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/STLExtras.h" @@ -29,6 +30,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPassManager.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" @@ -597,7 +599,7 @@ } /// \brief Try to distribute an inner-most loop. - bool processLoop(LoopAccessLegacyAnalysis *LAA) { + bool processLoop(std::function &GetLAA) { assert(L->empty() && "Only process inner loops."); DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName() @@ -610,7 +612,7 @@ return fail("multiple exit blocks"); // LAA will check that we only have a single exiting block. - LAI = &LAA->getInfo(L); + LAI = &GetLAA(*L); // Currently, we only distribute to isolate the part of the loop with // dependence cycles to enable partial vectorization. @@ -860,19 +862,50 @@ Optional IsForced; }; +/// Shared implementation between new and old PMs. +static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, + ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, + std::function &GetLAA, + bool ProcessAllLoops) { + // Build up a worklist of inner-loops to vectorize. This is necessary as the + // act of distributing a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE); + + // If distribution was forced for the specific loop to be + // enabled/disabled, follow that. Otherwise use the global flag. + if (LDL.isForced().getValueOr(ProcessAllLoops)) + Changed |= LDL.processLoop(GetLAA); + } + + // Process each loop nest in the function. + return Changed; +} + /// \brief The pass class. -class LoopDistribute : public FunctionPass { +class LoopDistributeLegacy : public FunctionPass { public: /// \p ProcessAllLoopsByDefault specifies whether loop distribution should be /// performed by default. Pass -enable-loop-distribute={0,1} overrides this /// default. We use this to keep LoopDistribution off by default when invoked /// from the optimization pipeline but on when invoked explicitly from opt. - LoopDistribute(bool ProcessAllLoopsByDefault = true) + LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true) : FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) { // The default is set by the caller. if (EnableLoopDistribute.getNumOccurrences() > 0) ProcessAllLoops = EnableLoopDistribute; - initializeLoopDistributePass(*PassRegistry::getPassRegistry()); + initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -884,31 +917,10 @@ auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); auto *ORE = &getAnalysis().getORE(); + std::function GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; - // Build up a worklist of inner-loops to vectorize. This is necessary as the - // act of distributing a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE); - - // If distribution was forced for the specific loop to be - // enabled/disabled, follow that. Otherwise use the global flag. - if (LDL.isForced().getValueOr(ProcessAllLoops)) - Changed |= LDL.processLoop(LAA); - } - - // Process each loop nest in the function. - return Changed; + return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -930,19 +942,49 @@ }; } // anonymous namespace -char LoopDistribute::ID; +PreservedAnalyses LoopDistributePass::run(Function &F, + FunctionAnalysisManager &AM) { + // FIXME: This does not currently match the behavior from the old PM. + // ProcessAllLoops with the old PM defaults to true when invoked from opt and + // false when invoked from the optimization pipeline. + bool ProcessAllLoops = false; + if (EnableLoopDistribute.getNumOccurrences() > 0) + ProcessAllLoops = EnableLoopDistribute; + + auto &LI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &ORE = AM.getResult(F); + + auto &LAM = AM.getResult(F).getManager(); + std::function GetLAA = + [&](Loop &L) -> const LoopAccessInfo & { + return LAM.getResult(L); + }; + + bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} + +char LoopDistributeLegacy::ID; static const char ldist_name[] = "Loop Distribition"; -INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false) +INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, + false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false) +INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) namespace llvm { FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) { - return new LoopDistribute(ProcessAllLoopsByDefault); + return new LoopDistributeLegacy(ProcessAllLoopsByDefault); } } Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -86,7 +86,7 @@ initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); - initializeLoopDistributePass(Registry); + initializeLoopDistributeLegacyPass(Registry); initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningPassPass(Registry); Index: llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll =================================================================== --- llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll +++ llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll @@ -5,6 +5,11 @@ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS + ; REQUIRES: asserts ; HOTNESS: block-frequency: forced Index: llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll =================================================================== --- llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll +++ llvm/trunk/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll @@ -3,6 +3,11 @@ ; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \ ; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS +; RUN: opt -passes='require,loop-distribute' -S -pass-remarks-missed=loop-distribute \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS + ; REQUIRES: asserts ; This is the input program: