Index: llvm/include/llvm/Transforms/Utils/LoopPeel.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -0,0 +1,40 @@ +//===- llvm/Transforms/Utils/LoopPeel.h ----- Peeling utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some loop peeling utilities. It does not define any +// actual pass or policy. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOOPPEEL_H +#define LLVM_TRANSFORMS_UTILS_LOOPPEEL_H + +#include "llvm/Analysis/TargetTransformInfo.h" + +namespace llvm { + +bool canPeel(Loop *L); + +bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); + +TargetTransformInfo::PeelingPreferences +gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + bool UnrollingSpecficValues = false); + +void computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::PeelingPreferences &PP, + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold = UINT_MAX); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOOPPEEL_H Index: llvm/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -92,16 +92,6 @@ const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop = nullptr); -void computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, - TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE); - -bool canPeel(Loop *L); - -bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); - LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, @@ -121,7 +111,6 @@ unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - bool &UseUpperBound); void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, @@ -138,12 +127,6 @@ Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount); -TargetTransformInfo::PeelingPreferences -gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling); - unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, Index: llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; Index: llvm/lib/Transforms/Scalar/LoopFuse.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -66,7 +66,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeMoverUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/LoopPeel.h" using namespace llvm; Index: llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -56,6 +56,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -115,10 +116,6 @@ cl::desc( "Set the max unroll count for full unrolling, for testing purposes")); -static cl::opt UnrollPeelCount( - "unroll-peel-count", cl::Hidden, - cl::desc("Set the unroll peeling count, for testing purposes")); - static cl::opt UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " @@ -149,15 +146,6 @@ "threshold, the loop is considered as flat and will be less " "aggressively unrolled.")); -static cl::opt - UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, - cl::desc("Allows loops to be peeled when the dynamic " - "trip count is known to be low.")); - -static cl::opt UnrollAllowLoopNestsPeeling( - "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, - cl::desc("Allows loop nests to be peeled.")); - static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -275,39 +263,6 @@ return UP; } -TargetTransformInfo::PeelingPreferences -llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling) { - TargetTransformInfo::PeelingPreferences PP; - - // Default values - PP.PeelCount = 0; - PP.AllowPeeling = true; - PP.AllowLoopNestsPeeling = false; - PP.PeelProfiledIterations = true; - - // Get Target Specifc Values - TTI.getPeelingPreferences(L, SE, PP); - - // User Specified Values using cl::opt - if (UnrollPeelCount.getNumOccurrences() > 0) - PP.PeelCount = UnrollPeelCount; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - PP.AllowPeeling = UnrollAllowPeeling; - if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) - PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; - - // User Specifed values provided by argument - if (UserAllowPeeling.hasValue()) - PP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) - PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; - - return PP; -} - namespace { /// A struct to densely store the state of an instruction after unrolling at @@ -881,7 +836,7 @@ } // 4th priority is loop peeling. - computePeelCount(L, LoopSize, UP, PP, TripCount, SE); + computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; @@ -1087,7 +1042,7 @@ ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedFullUnrollMaxCount); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( - L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling); + L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -35,11 +35,11 @@ LCSSA.cpp LibCallsShrinkWrap.cpp Local.cpp + LoopPeel.cpp LoopRotationUtils.cpp LoopSimplify.cpp LoopUnroll.cpp LoopUnrollAndJam.cpp - LoopUnrollPeel.cpp LoopUnrollRuntime.cpp LoopUtils.cpp LoopVersioning.cpp Index: llvm/lib/Transforms/Utils/LoopPeel.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopPeel.cpp +++ llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -1,4 +1,4 @@ -//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// +//===- LoopPeel.cpp -------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,53 +6,65 @@ // //===----------------------------------------------------------------------===// // -// This file implements some loop unrolling utilities for peeling loops -// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for -// unrolling loops with compile-time constant trip counts. -// +// Loop Peel Utilities. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/LoopUnrollAnalyzer.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include -#include -#include -#include using namespace llvm; using namespace llvm::PatternMatch; -#define DEBUG_TYPE "loop-unroll" +#define DEBUG_TYPE "loop-peel" STATISTIC(NumPeeled, "Number of loops peeled"); +static cl::opt UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + +static cl::opt + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, + cl::desc("Allows loops to be peeled when the dynamic " + "trip count is known to be low.")); + +static cl::opt + UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", + cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt UnrollPeelMaxCount( "unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling.")); @@ -278,9 +290,9 @@ // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE) { + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -322,7 +334,7 @@ // maximum number of iterations among these values, thus turning all those // Phis into invariants. // First, check that we can peel at least one iteration. - if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { // Store the pre-calculated values here. SmallDenseMap IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they @@ -342,7 +354,7 @@ // Pay respect to limitations implied by loop size and the max peel count. unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); DesiredPeelCount = std::max(DesiredPeelCount, countToEliminateCompares(*L, MaxPeelCount, SE)); @@ -385,7 +397,7 @@ if (*PeelCount) { if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { + (LoopSize * (*PeelCount + 1) <= Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); PP.PeelCount = *PeelCount; @@ -396,7 +408,7 @@ LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); - LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); } } } @@ -491,7 +503,7 @@ /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, - SmallVectorImpl > &ExitEdges, + SmallVectorImpl> &ExitEdges, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI) { @@ -599,6 +611,40 @@ LVMap[KV.first] = KV.second; } +TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { + TargetTransformInfo::PeelingPreferences PP; + + // Default values + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get Target Specifc Values + TTI.getPeelingPreferences(L, SE, PP); + + if (UnrollingSpecficValues) { + // User Specified Values using cl::opt + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + } + + // User Specifed values provided by argument + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + /// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. @@ -609,8 +655,8 @@ /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA) { + ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,6 +59,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h"