Index: include/llvm/Analysis/LoopVectorizationLegality.h =================================================================== --- include/llvm/Analysis/LoopVectorizationLegality.h +++ include/llvm/Analysis/LoopVectorizationLegality.h @@ -0,0 +1,455 @@ +//===- llvm/Analysis/LoopVectorizationLegality.h ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file defines the LoopVectorizationLegality class. Original code +/// in Loop Vectorizer has been moved to Analysis tree for modularity +/// and reusability. +/// +/// Currently, it works for innermost loop vectorization. Extending this to +/// outer loop vectorization is a TODO item. +/// +/// Also provides: +/// LoopVectorizeHints class which keeps a number of loop annotations +/// locally for easy look up. It has the ability to write them back as +/// loop metadata, upon request. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPVECTORIZATIONLEGALITY_H +#define LLVM_ANALYSIS_LOOPVECTORIZATIONLEGALITY_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/Utils/LoopUtils.h" + +namespace llvm { + +// class TargetLibraryInfo; +// class AliasAnalysis; + +/// Create an analysis remark that explains why vectorization failed +/// +/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p +/// RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be +/// streamed to. +OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, + StringRef RemarkName, + Loop *TheLoop, + Instruction *I = nullptr); + +/// Utility class for getting and setting loop vectorizer hints in the form +/// of loop metadata. +/// This class keeps a number of loop annotations locally (as member variables) +/// and can, upon request, write them back as metadata on the loop. It will +/// initially scan the loop for existing metadata, and will update the local +/// values based on information in the loop. +/// We cannot write all values to metadata, as the mere presence of some info, +/// for example 'force', means a decision has been made. So, we need to be +/// careful NOT to add them if the user hasn't specifically asked so. +class LoopVectorizeHints { + enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; + + /// Hint - associates name and validation with the hint value. + struct Hint { + const char *Name; + unsigned Value; // This may have to change for non-numeric values. + HintKind Kind; + + Hint(const char *Name, unsigned Value, HintKind Kind) + : Name(Name), Value(Value), Kind(Kind) {} + + bool validate(unsigned Val); + }; + + /// Vectorization width. + Hint Width; + + /// Vectorization interleave factor. + Hint Interleave; + + /// Vectorization forced + Hint Force; + + /// Already Vectorized + Hint IsVectorized; + + /// Return the loop metadata prefix. + static StringRef Prefix() { return "llvm.loop."; } + + /// True if there is any unsafe math in the loop. + bool PotentiallyUnsafe = false; + +public: + enum ForceKind { + FK_Undefined = -1, ///< Not selected. + FK_Disabled = 0, ///< Forcing disabled. + FK_Enabled = 1, ///< Forcing enabled. + }; + + LoopVectorizeHints(const Loop *L, bool DisableInterleaving, + OptimizationRemarkEmitter &ORE); + + /// Mark the loop L as already vectorized by setting the width to 1. + void setAlreadyVectorized() { + IsVectorized.Value = 1; + Hint Hints[] = {IsVectorized}; + writeHintsToMetadata(Hints); + } + + bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const; + + /// Dumps all the hint information. + void emitRemarkWithHints() const; + + unsigned getWidth() const { return Width.Value; } + unsigned getInterleave() const { return Interleave.Value; } + unsigned getIsVectorized() const { return IsVectorized.Value; } + enum ForceKind getForce() const { return (ForceKind)Force.Value; } + + /// \brief If hints are provided that force vectorization, use the AlwaysPrint + /// pass name to force the frontend to print the diagnostic. + const char *vectorizeAnalysisPassName() const; + + bool allowReordering() const { + // When enabling loop hints are provided we allow the vectorizer to change + // the order of operations that is given by the scalar loop. This is not + // enabled by default because can be unsafe or inefficient. For example, + // reordering floating-point operations will change the way round-off + // error accumulates in the loop. + return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; + } + + bool isPotentiallyUnsafe() const { + // Avoid FP vectorization if the target is unsure about proper support. + // This may be related to the SIMD unit in the target not handling + // IEEE 754 FP ops properly, or bad single-to-double promotions. + // Otherwise, a sequence of vectorized loops, even without reduction, + // could lead to different end results on the destination vectors. + return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; + } + + void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } + +private: + /// Find hints specified in the loop metadata and update local values. + void getHintsFromMetadata(); + + /// Checks string hint with one operand and set value if valid. + void setHint(StringRef Name, Metadata *Arg); + + /// Create a new hint from name / value pair. + MDNode *createHintMetadata(StringRef Name, unsigned V) const; + + /// Matches metadata with hint name. + bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes); + + /// Sets current hints into loop metadata, keeping other values intact. + void writeHintsToMetadata(ArrayRef HintTypes); + + /// The loop these hints belong to. + const Loop *TheLoop; + + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter &ORE; +}; + +/// \brief This holds vectorization requirements that must be verified late in +/// the process. The requirements are set by legalize and costmodel. Once +/// vectorization has been determined to be possible and profitable the +/// requirements can be verified by looking for metadata or compiler options. +/// For example, some loops require FP commutativity which is only allowed if +/// vectorization is explicitly specified or if the fast-math compiler option +/// has been provided. +/// Late evaluation of these requirements allows helpful diagnostics to be +/// composed that tells the user what need to be done to vectorize the loop. For +/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late +/// evaluation should be used only when diagnostics can generated that can be +/// followed by a non-expert user. +class LoopVectorizationRequirements { +public: + LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} + + void addUnsafeAlgebraInst(Instruction *I) { + // First unsafe algebra instruction. + if (!UnsafeAlgebraInst) + UnsafeAlgebraInst = I; + } + + void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } + + bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); + +private: + unsigned NumRuntimePointerChecks = 0; + Instruction *UnsafeAlgebraInst = nullptr; + + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter &ORE; +}; + +/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and +/// to what vectorization factor. +/// This class does not look at the profitability of vectorization, only the +/// legality. This class has two main kinds of checks: +/// * Memory checks - The code in canVectorizeMemory checks if vectorization +/// will change the order of memory accesses in a way that will change the +/// correctness of the program. +/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory +/// checks for a number of different conditions, such as the availability of a +/// single induction variable, that all types are supported and vectorize-able, +/// etc. This code reflects the capabilities of InnerLoopVectorizer. +/// This class is also used by InnerLoopVectorizer for identifying +/// induction variable and the different reduction variables. +class LoopVectorizationLegality { +public: + LoopVectorizationLegality( + Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, + TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, + std::function *GetLAA, LoopInfo *LI, + OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, + LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) + : TheLoop(L), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA), ORE(ORE), + Requirements(R), Hints(H), DB(DB), AC(AC) {} + + /// ReductionList contains the reduction descriptors for all + /// of the reductions that were found in the loop. + using ReductionList = DenseMap; + + /// InductionList saves induction variables and maps them to the + /// induction descriptor. + using InductionList = MapVector; + + /// RecurrenceSet contains the phi nodes that are recurrences other than + /// inductions and reductions. + using RecurrenceSet = SmallPtrSet; + + /// Returns true if it is legal to vectorize this loop. + /// This does not mean that it is profitable to vectorize this + /// loop, only that it is legal to do so. + bool canVectorize(); + + /// Returns the primary induction variable. + PHINode *getPrimaryInduction() { return PrimaryInduction; } + + /// Returns the reduction variables found in the loop. + ReductionList *getReductionVars() { return &Reductions; } + + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + + /// Return the first-order recurrences found in the loop. + RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } + + /// Return the set of instructions to sink to handle first-order recurrences. + DenseMap &getSinkAfter() { return SinkAfter; } + + /// Returns the widest induction type. + Type *getWidestInductionType() { return WidestIndTy; } + + /// Returns True if V is a Phi node of an induction variable in this loop. + bool isInductionPhi(const Value *V); + + /// Returns True if V is a cast that is part of an induction def-use chain, + /// and had been proven to be redundant under a runtime guard (in other + /// words, the cast has the same SCEV expression as the induction phi). + bool isCastedInductionVariable(const Value *V); + + /// Returns True if V can be considered as an induction variable in this + /// loop. V can be the induction phi, or some redundant cast in the def-use + /// chain of the inducion phi. + bool isInductionVariable(const Value *V); + + /// Returns True if PN is a reduction variable in this loop. + bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } + + /// Returns True if Phi is a first-order recurrence in this loop. + bool isFirstOrderRecurrence(const PHINode *Phi); + + /// Return true if the block BB needs to be predicated in order for the loop + /// to be vectorized. + bool blockNeedsPredication(BasicBlock *BB); + + /// Check if this pointer is consecutive when vectorizing. This happens + /// when the last index of the GEP is the induction variable, or that the + /// pointer itself is an induction variable. + /// This check allows us to vectorize A[idx] into a wide load/store. + /// Returns: + /// 0 - Stride is unknown or non-consecutive. + /// 1 - Address is consecutive. + /// -1 - Address is consecutive, and decreasing. + /// NOTE: This method must only be used before modifying the original scalar + /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). + int isConsecutivePtr(Value *Ptr); + + /// Returns true if the value V is uniform within the loop. + bool isUniform(Value *V); + + /// Returns the information that we collected about runtime memory check. + const RuntimePointerChecking *getRuntimePointerChecking() const { + return LAI->getRuntimePointerChecking(); + } + + const LoopAccessInfo *getLAI() const { return LAI; } + + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } + + uint64_t getMaxSafeRegisterWidth() const { + return LAI->getDepChecker().getMaxSafeRegisterWidth(); + } + + bool hasStride(Value *V) { return LAI->hasStride(V); } + + /// Returns true if vector representation of the instruction \p I + /// requires mask. + bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } + + unsigned getNumStores() const { return LAI->getNumStores(); } + unsigned getNumLoads() const { return LAI->getNumLoads(); } + + // Returns true if the NoNaN attribute is set on the function. + bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } + +private: + /// Check if a single basic block loop is vectorizable. + /// At this point we know that this is a loop with a constant trip count + /// and we only need to check individual instructions. + bool canVectorizeInstrs(); + + /// When we vectorize loops we may change the order in which + /// we read and write from memory. This method checks if it is + /// legal to vectorize the code, considering only memory constrains. + /// Returns true if the loop is vectorizable + bool canVectorizeMemory(); + + /// Return true if we can vectorize this loop using the IF-conversion + /// transformation. + bool canVectorizeWithIfConvert(); + + /// Return true if all of the instructions in the block can be speculatively + /// executed. \p SafePtrs is a list of addresses that are known to be legal + /// and we know that we can read from them without segfault. + bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); + + /// Updates the vectorization state by adding \p Phi to the inductions list. + /// This can set \p Phi as the main induction of the loop if \p Phi is a + /// better choice for the main induction than the existing one. + void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, + SmallPtrSetImpl &AllowedExit); + + /// Create an analysis remark that explains why vectorization failed + /// + /// \p RemarkName is the identifier for the remark. If \p I is passed it is + /// an instruction that prevents vectorization. Otherwise the loop is used + /// for the location of the remark. \return the remark object that can be + /// streamed to. + OptimizationRemarkAnalysis + createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const { + return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), + RemarkName, TheLoop, I); + } + + /// \brief If an access has a symbolic strides, this maps the pointer value to + /// the stride symbol. + const ValueToValueMap *getSymbolicStrides() { + // FIXME: Currently, the set of symbolic strides is sometimes queried before + // it's collected. This happens from canVectorizeWithIfConvert, when the + // pointer is checked to reference consecutive elements suitable for a + // masked access. + return LAI ? &LAI->getSymbolicStrides() : nullptr; + } + + /// The loop that we evaluate. + Loop *TheLoop; + + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. + /// Applies dynamic knowledge to simplify SCEV expressions in the context + /// of existing SCEV assumptions. The analysis will also add a minimal set + /// of new predicates if this is required to enable vectorization and + /// unrolling. + PredicatedScalarEvolution &PSE; + + /// Target Library Info. + TargetLibraryInfo *TLI; + + /// Dominator Tree. + DominatorTree *DT; + + // LoopAccess analysis. + std::function *GetLAA; + + // And the loop-accesses info corresponding to this loop. This pointer is + // null until canVectorizeMemory sets it up. + const LoopAccessInfo *LAI = nullptr; + + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter *ORE; + + // --- vectorization state --- // + + /// Holds the primary induction variable. This is the counter of the + /// loop. + PHINode *PrimaryInduction = nullptr; + + /// Holds the reduction variables. + ReductionList Reductions; + + /// Holds all of the induction variables that we found in the loop. + /// Notice that inductions don't need to start at zero and that induction + /// variables can be pointers. + InductionList Inductions; + + /// Holds all the casts that participate in the update chain of the induction + /// variables, and that have been proven to be redundant (possibly under a + /// runtime guard). These casts can be ignored when creating the vectorized + /// loop body. + SmallPtrSet InductionCastsToIgnore; + + /// Holds the phi nodes that are first-order recurrences. + RecurrenceSet FirstOrderRecurrences; + + /// Holds instructions that need to sink past other instructions to handle + /// first-order recurrences. + DenseMap SinkAfter; + + /// Holds the widest induction type encountered. + Type *WidestIndTy = nullptr; + + /// Allowed outside users. This holds the induction and reduction + /// vars which can be accessed from outside the loop. + SmallPtrSet AllowedExit; + + /// Can we assume the absence of NaNs. + bool HasFunNoNaNAttr = false; + + /// Vectorization requirements that will go through late-evaluation. + LoopVectorizationRequirements *Requirements; + + /// Used to emit an analysis of any legality issues. + LoopVectorizeHints *Hints; + + /// The demanded bits analsyis is used to compute the minimum type size in + /// which a reduction can be computed. + DemandedBits *DB; + + /// The assumption cache analysis is used to compute the minimum type size in + /// which a reduction can be computed. + AssumptionCache *AC; + + /// While vectorizing these instructions we have to generate a + /// call to the appropriate masked intrinsic + SmallPtrSet MaskedOp; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPVECTORIZATIONLEGALITY_H Index: include/llvm/Analysis/Utils/LoopUtils.h =================================================================== --- include/llvm/Analysis/Utils/LoopUtils.h +++ include/llvm/Analysis/Utils/LoopUtils.h @@ -0,0 +1,344 @@ +//===- llvm/Analysis/Utils/LoopUtils.h - Loop utilities ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines some loop transformation utilities. ReccurenceDescriptor +// and InductionDescriptor classes are currently housed here. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_UTILS_LOOPUTILS_H +#define LLVM_ANALYSIS_UTILS_LOOPUTILS_H + +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +class AssumptionCache; +class DataLayout; +class DemandedBits; +class DominatorTree; +class Loop; +class PredicatedScalarEvolution; +class ScalarEvolution; +class SCEV; + +/// The RecurrenceDescriptor is used to identify recurrences variables in a +/// loop. Reduction is a special case of recurrence that has uses of the +/// recurrence variable outside the loop. The method isReductionPHI identifies +/// reductions that are basic recurrences. +/// +/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min, +/// or max of a set of terms. For example: for(i=0; i &CI) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), + UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { + CastInsts.insert(CI.begin(), CI.end()); + } + + /// This POD struct holds information about a potential recurrence operation. + class InstDesc { + public: + InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) + : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), + UnsafeAlgebraInst(UAI) {} + + InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) + : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), + UnsafeAlgebraInst(UAI) {} + + bool isRecurrence() { return IsRecurrence; } + + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + + MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } + + Instruction *getPatternInst() { return PatternLastInst; } + + private: + // Is this instruction a recurrence candidate. + bool IsRecurrence; + // The last instruction in a min/max pattern (select of the select(icmp()) + // pattern), or the current recurrence instruction otherwise. + Instruction *PatternLastInst; + // If this is a min/max pattern the comparison predicate. + MinMaxRecurrenceKind MinMaxKind; + // Recurrence has unsafe algebra. + Instruction *UnsafeAlgebraInst; + }; + + /// Returns a struct describing if the instruction 'I' can be a recurrence + /// variable of type 'Kind'. If the recurrence is a min/max pattern of + /// select(icmp()) this function advances the instruction pointer 'I' from the + /// compare instruction to the select instruction and stores this pointer in + /// 'PatternLastInst' member of the returned struct. + static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + InstDesc &Prev, bool HasFunNoNaNAttr); + + /// Returns true if instruction I has multiple uses in Insts + static bool hasMultipleUsesOf(Instruction *I, + SmallPtrSetImpl &Insts); + + /// Returns true if all uses of the instruction I is within the Set. + static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set); + + /// Returns a struct describing if the instruction if the instruction is a + /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) + /// or max(X, Y). + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); + + /// Returns identity corresponding to the RecurrenceKind. + static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp); + + /// Returns the opcode of binary operation corresponding to the + /// RecurrenceKind. + static unsigned getRecurrenceBinOp(RecurrenceKind Kind); + + /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. + static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK, + Value *Left, Value *Right); + + /// Returns true if Phi is a reduction of type Kind and adds it to the + /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. + static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, + bool HasFunNoNaNAttr, + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); + + /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor + /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are + /// non-null, the minimal bit width needed to compute the reduction will be + /// computed. + static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes, + DemandedBits *DB = nullptr, + AssumptionCache *AC = nullptr, + DominatorTree *DT = nullptr); + + /// Returns true if Phi is a first-order recurrence. A first-order recurrence + /// is a non-reduction recurrence relation in which the value of the + /// recurrence in the current loop iteration equals a value defined in the + /// previous iteration. \p SinkAfter includes pairs of instructions where the + /// first will be rescheduled to appear after the second if/when the loop is + /// vectorized. It may be augmented with additional pairs if needed in order + /// to handle Phi as a first-order recurrence. + static bool + isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, + DenseMap &SinkAfter, + DominatorTree *DT); + + RecurrenceKind getRecurrenceKind() { return Kind; } + + MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } + + TrackingVH getRecurrenceStartValue() { return StartValue; } + + Instruction *getLoopExitInstr() { return LoopExitInstr; } + + /// Returns true if the recurrence has unsafe algebra which requires a relaxed + /// floating-point model. + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + /// Returns first unsafe algebra instruction in the PHI node's use-chain. + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + + /// Returns true if the recurrence kind is an integer kind. + static bool isIntegerRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is a floating point kind. + static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); + + /// Returns true if the recurrence kind is an arithmetic kind. + static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); + + /// Returns the type of the recurrence. This type can be narrower than the + /// actual type of the Phi if the recurrence has been type-promoted. + Type *getRecurrenceType() { return RecurrenceType; } + + /// Returns a reference to the instructions used for type-promoting the + /// recurrence. + SmallPtrSet &getCastInsts() { return CastInsts; } + + /// Returns true if all source operands of the recurrence are SExtInsts. + bool isSigned() { return IsSigned; } + +private: + // The starting value of the recurrence. + // It does not have to be zero! + TrackingVH StartValue; + // The instruction who's value is used outside the loop. + Instruction *LoopExitInstr = nullptr; + // The kind of the recurrence. + RecurrenceKind Kind = RK_NoRecurrence; + // If this a min/max recurrence the kind of recurrence. + MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; + // First occurrence of unasfe algebra in the PHI's use-chain. + Instruction *UnsafeAlgebraInst = nullptr; + // The type of the recurrence. + Type *RecurrenceType = nullptr; + // True if all source operands of the recurrence are SExtInsts. + bool IsSigned = false; + // Instructions used for type-promoting the recurrence. + SmallPtrSet CastInsts; +}; + +/// A struct for saving information about induction variables. +class InductionDescriptor { +public: + /// This enum represents the kinds of inductions that we support. + enum InductionKind { + IK_NoInduction, ///< Not an induction variable. + IK_IntInduction, ///< Integer induction variable. Step = C. + IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem). + IK_FpInduction ///< Floating point induction variable. + }; + +public: + /// Default constructor - creates an invalid induction. + InductionDescriptor() = default; + + /// Get the consecutive direction. Returns: + /// 0 - unknown or non-consecutive. + /// 1 - consecutive and increasing. + /// -1 - consecutive and decreasing. + int getConsecutiveDirection() const; + + /// Compute the transformed value of Index at offset StartValue using step + /// StepValue. + /// For integer induction, returns StartValue + Index * StepValue. + /// For pointer induction, returns StartValue[Index * StepValue]. + /// FIXME: The newly created binary instructions should contain nsw/nuw + /// flags, which can be found from the original scalar operations. + Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, + const DataLayout& DL) const; + + Value *getStartValue() const { return StartValue; } + InductionKind getKind() const { return IK; } + const SCEV *getStep() const { return Step; } + ConstantInt *getConstIntStepValue() const; + + /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an + /// induction, the induction descriptor \p D will contain the data describing + /// this induction. If by some other means the caller has a better SCEV + /// expression for \p Phi than the one returned by the ScalarEvolution + /// analysis, it can be passed through \p Expr. If the def-use chain + /// associated with the phi includes casts (that we know we can ignore + /// under proper runtime checks), they are passed through \p CastsToIgnore. + static bool + isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr = nullptr, + SmallVectorImpl *CastsToIgnore = nullptr); + + /// Returns true if \p Phi is a floating point induction in the loop \p L. + /// If \p Phi is an induction, the induction descriptor \p D will contain + /// the data describing this induction. + static bool isFPInductionPHI(PHINode *Phi, const Loop* L, + ScalarEvolution *SE, InductionDescriptor &D); + + /// Returns true if \p Phi is a loop \p L induction, in the context associated + /// with the run-time predicate of PSE. If \p Assume is true, this can add + /// further SCEV predicates to \p PSE in order to prove that \p Phi is an + /// induction. + /// If \p Phi is an induction, \p D will contain the data describing this + /// induction. + static bool isInductionPHI(PHINode *Phi, const Loop* L, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, bool Assume = false); + + /// Returns true if the induction type is FP and the binary operator does + /// not have the "fast-math" property. Such operation requires a relaxed FP + /// mode. + bool hasUnsafeAlgebra() { + return InductionBinOp && !cast(InductionBinOp)->isFast(); + } + + /// Returns induction operator that does not have "fast-math" property + /// and requires FP unsafe mode. + Instruction *getUnsafeAlgebraInst() { + if (!InductionBinOp || cast(InductionBinOp)->isFast()) + return nullptr; + return InductionBinOp; + } + + /// Returns binary opcode of the induction operator. + Instruction::BinaryOps getInductionOpcode() const { + return InductionBinOp ? InductionBinOp->getOpcode() : + Instruction::BinaryOpsEnd; + } + + /// Returns a reference to the type cast instructions in the induction + /// update chain, that are redundant when guarded with a runtime + /// SCEV overflow check. + const SmallVectorImpl &getCastInsts() const { + return RedundantCasts; + } + +private: + /// Private constructor - used by \c isInductionPHI. + InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, + BinaryOperator *InductionBinOp = nullptr, + SmallVectorImpl *Casts = nullptr); + + /// Start value. + TrackingVH StartValue; + /// Induction kind. + InductionKind IK = IK_NoInduction; + /// Step value. + const SCEV *Step = nullptr; + // Instruction that advances induction variable. + BinaryOperator *InductionBinOp = nullptr; + // Instructions used for type-casts of the induction variable, + // that are redundant when guarded with a runtime SCEV overflow check. + SmallVector RedundantCasts; +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_UTILS_LOOPUTILS_H Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -14,351 +14,20 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DemandedBits.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/LoopUtils.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Casting.h" namespace llvm { -class AliasSet; class AliasSetTracker; -class BasicBlock; -class DataLayout; -class Loop; class LoopInfo; +class LoopSafetyInfo; class OptimizationRemarkEmitter; -class PredicatedScalarEvolution; class PredIteratorCache; -class ScalarEvolution; -class SCEV; class TargetLibraryInfo; -class TargetTransformInfo; - - -/// The RecurrenceDescriptor is used to identify recurrences variables in a -/// loop. Reduction is a special case of recurrence that has uses of the -/// recurrence variable outside the loop. The method isReductionPHI identifies -/// reductions that are basic recurrences. -/// -/// Basic recurrences are defined as the summation, product, OR, AND, XOR, min, -/// or max of a set of terms. For example: for(i=0; i &CI) - : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), - UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { - CastInsts.insert(CI.begin(), CI.end()); - } - - /// This POD struct holds information about a potential recurrence operation. - class InstDesc { - public: - InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) - : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), - UnsafeAlgebraInst(UAI) {} - - InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) - : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), - UnsafeAlgebraInst(UAI) {} - - bool isRecurrence() { return IsRecurrence; } - - bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } - - Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } - - MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } - - Instruction *getPatternInst() { return PatternLastInst; } - - private: - // Is this instruction a recurrence candidate. - bool IsRecurrence; - // The last instruction in a min/max pattern (select of the select(icmp()) - // pattern), or the current recurrence instruction otherwise. - Instruction *PatternLastInst; - // If this is a min/max pattern the comparison predicate. - MinMaxRecurrenceKind MinMaxKind; - // Recurrence has unsafe algebra. - Instruction *UnsafeAlgebraInst; - }; - - /// Returns a struct describing if the instruction 'I' can be a recurrence - /// variable of type 'Kind'. If the recurrence is a min/max pattern of - /// select(icmp()) this function advances the instruction pointer 'I' from the - /// compare instruction to the select instruction and stores this pointer in - /// 'PatternLastInst' member of the returned struct. - static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, - InstDesc &Prev, bool HasFunNoNaNAttr); - - /// Returns true if instruction I has multiple uses in Insts - static bool hasMultipleUsesOf(Instruction *I, - SmallPtrSetImpl &Insts); - - /// Returns true if all uses of the instruction I is within the Set. - static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set); - - /// Returns a struct describing if the instruction if the instruction is a - /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); - - /// Returns identity corresponding to the RecurrenceKind. - static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp); - - /// Returns the opcode of binary operation corresponding to the - /// RecurrenceKind. - static unsigned getRecurrenceBinOp(RecurrenceKind Kind); - - /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. - static Value *createMinMaxOp(IRBuilder<> &Builder, MinMaxRecurrenceKind RK, - Value *Left, Value *Right); - - /// Returns true if Phi is a reduction of type Kind and adds it to the - /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are - /// non-null, the minimal bit width needed to compute the reduction will be - /// computed. - static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, - bool HasFunNoNaNAttr, - RecurrenceDescriptor &RedDes, - DemandedBits *DB = nullptr, - AssumptionCache *AC = nullptr, - DominatorTree *DT = nullptr); - - /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor - /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are - /// non-null, the minimal bit width needed to compute the reduction will be - /// computed. - static bool isReductionPHI(PHINode *Phi, Loop *TheLoop, - RecurrenceDescriptor &RedDes, - DemandedBits *DB = nullptr, - AssumptionCache *AC = nullptr, - DominatorTree *DT = nullptr); - - /// Returns true if Phi is a first-order recurrence. A first-order recurrence - /// is a non-reduction recurrence relation in which the value of the - /// recurrence in the current loop iteration equals a value defined in the - /// previous iteration. \p SinkAfter includes pairs of instructions where the - /// first will be rescheduled to appear after the second if/when the loop is - /// vectorized. It may be augmented with additional pairs if needed in order - /// to handle Phi as a first-order recurrence. - static bool - isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, - DenseMap &SinkAfter, - DominatorTree *DT); - - RecurrenceKind getRecurrenceKind() { return Kind; } - - MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } - - TrackingVH getRecurrenceStartValue() { return StartValue; } - - Instruction *getLoopExitInstr() { return LoopExitInstr; } - - /// Returns true if the recurrence has unsafe algebra which requires a relaxed - /// floating-point model. - bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } - - /// Returns first unsafe algebra instruction in the PHI node's use-chain. - Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } - - /// Returns true if the recurrence kind is an integer kind. - static bool isIntegerRecurrenceKind(RecurrenceKind Kind); - - /// Returns true if the recurrence kind is a floating point kind. - static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); - - /// Returns true if the recurrence kind is an arithmetic kind. - static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); - - /// Returns the type of the recurrence. This type can be narrower than the - /// actual type of the Phi if the recurrence has been type-promoted. - Type *getRecurrenceType() { return RecurrenceType; } - - /// Returns a reference to the instructions used for type-promoting the - /// recurrence. - SmallPtrSet &getCastInsts() { return CastInsts; } - - /// Returns true if all source operands of the recurrence are SExtInsts. - bool isSigned() { return IsSigned; } - -private: - // The starting value of the recurrence. - // It does not have to be zero! - TrackingVH StartValue; - // The instruction who's value is used outside the loop. - Instruction *LoopExitInstr = nullptr; - // The kind of the recurrence. - RecurrenceKind Kind = RK_NoRecurrence; - // If this a min/max recurrence the kind of recurrence. - MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; - // First occurrence of unasfe algebra in the PHI's use-chain. - Instruction *UnsafeAlgebraInst = nullptr; - // The type of the recurrence. - Type *RecurrenceType = nullptr; - // True if all source operands of the recurrence are SExtInsts. - bool IsSigned = false; - // Instructions used for type-promoting the recurrence. - SmallPtrSet CastInsts; -}; - -/// A struct for saving information about induction variables. -class InductionDescriptor { -public: - /// This enum represents the kinds of inductions that we support. - enum InductionKind { - IK_NoInduction, ///< Not an induction variable. - IK_IntInduction, ///< Integer induction variable. Step = C. - IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem). - IK_FpInduction ///< Floating point induction variable. - }; - -public: - /// Default constructor - creates an invalid induction. - InductionDescriptor() = default; - - /// Get the consecutive direction. Returns: - /// 0 - unknown or non-consecutive. - /// 1 - consecutive and increasing. - /// -1 - consecutive and decreasing. - int getConsecutiveDirection() const; - - /// Compute the transformed value of Index at offset StartValue using step - /// StepValue. - /// For integer induction, returns StartValue + Index * StepValue. - /// For pointer induction, returns StartValue[Index * StepValue]. - /// FIXME: The newly created binary instructions should contain nsw/nuw - /// flags, which can be found from the original scalar operations. - Value *transform(IRBuilder<> &B, Value *Index, ScalarEvolution *SE, - const DataLayout& DL) const; - - Value *getStartValue() const { return StartValue; } - InductionKind getKind() const { return IK; } - const SCEV *getStep() const { return Step; } - ConstantInt *getConstIntStepValue() const; - - /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an - /// induction, the induction descriptor \p D will contain the data describing - /// this induction. If by some other means the caller has a better SCEV - /// expression for \p Phi than the one returned by the ScalarEvolution - /// analysis, it can be passed through \p Expr. If the def-use chain - /// associated with the phi includes casts (that we know we can ignore - /// under proper runtime checks), they are passed through \p CastsToIgnore. - static bool - isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, - InductionDescriptor &D, const SCEV *Expr = nullptr, - SmallVectorImpl *CastsToIgnore = nullptr); - - /// Returns true if \p Phi is a floating point induction in the loop \p L. - /// If \p Phi is an induction, the induction descriptor \p D will contain - /// the data describing this induction. - static bool isFPInductionPHI(PHINode *Phi, const Loop* L, - ScalarEvolution *SE, InductionDescriptor &D); - - /// Returns true if \p Phi is a loop \p L induction, in the context associated - /// with the run-time predicate of PSE. If \p Assume is true, this can add - /// further SCEV predicates to \p PSE in order to prove that \p Phi is an - /// induction. - /// If \p Phi is an induction, \p D will contain the data describing this - /// induction. - static bool isInductionPHI(PHINode *Phi, const Loop* L, - PredicatedScalarEvolution &PSE, - InductionDescriptor &D, bool Assume = false); - - /// Returns true if the induction type is FP and the binary operator does - /// not have the "fast-math" property. Such operation requires a relaxed FP - /// mode. - bool hasUnsafeAlgebra() { - return InductionBinOp && !cast(InductionBinOp)->isFast(); - } - - /// Returns induction operator that does not have "fast-math" property - /// and requires FP unsafe mode. - Instruction *getUnsafeAlgebraInst() { - if (!InductionBinOp || cast(InductionBinOp)->isFast()) - return nullptr; - return InductionBinOp; - } - - /// Returns binary opcode of the induction operator. - Instruction::BinaryOps getInductionOpcode() const { - return InductionBinOp ? InductionBinOp->getOpcode() : - Instruction::BinaryOpsEnd; - } - - /// Returns a reference to the type cast instructions in the induction - /// update chain, that are redundant when guarded with a runtime - /// SCEV overflow check. - const SmallVectorImpl &getCastInsts() const { - return RedundantCasts; - } - -private: - /// Private constructor - used by \c isInductionPHI. - InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, - BinaryOperator *InductionBinOp = nullptr, - SmallVectorImpl *Casts = nullptr); - - /// Start value. - TrackingVH StartValue; - /// Induction kind. - InductionKind IK = IK_NoInduction; - /// Step value. - const SCEV *Step = nullptr; - // Instruction that advances induction variable. - BinaryOperator *InductionBinOp = nullptr; - // Instructions used for type-casts of the induction variable, - // that are redundant when guarded with a runtime SCEV overflow check. - SmallVector RedundantCasts; -}; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -46,9 +46,11 @@ Loads.cpp LoopAccessAnalysis.cpp LoopAnalysisManager.cpp - LoopUnrollAnalyzer.cpp LoopInfo.cpp LoopPass.cpp + LoopUnrollAnalyzer.cpp + LoopUtils.cpp + LoopVectorizationLegality.cpp MemDepPrinter.cpp MemDerefPrinter.cpp MemoryBuiltins.cpp Index: lib/Analysis/LoopUtils.cpp =================================================================== --- lib/Analysis/LoopUtils.cpp +++ lib/Analysis/LoopUtils.cpp @@ -0,0 +1,1126 @@ +//===-- LoopUtils.cpp - Loop Utility functions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines common loop utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Utils/LoopUtils.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "loop-utils" + +bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, + SmallPtrSetImpl &Set) { + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) + if (!Set.count(dyn_cast(*Use))) + return false; + return true; +} + +bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_IntegerOr: + case RK_IntegerAnd: + case RK_IntegerXor: + case RK_IntegerMinMax: + return true; + } + return false; +} + +bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { + return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); +} + +bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { + switch (Kind) { + default: + break; + case RK_IntegerAdd: + case RK_IntegerMult: + case RK_FloatAdd: + case RK_FloatMult: + return true; + } + return false; +} + +/// Determines if Phi may have been type-promoted. If Phi has a single user +/// that ANDs the Phi with a type mask, return the user. RT is updated to +/// account for the narrower bit width represented by the mask, and the AND +/// instruction is added to CI. +static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &CI) { + if (!Phi->hasOneUse()) + return Phi; + + const APInt *M = nullptr; + Instruction *I, *J = cast(Phi->use_begin()->getUser()); + + // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT + // with a new integer type of the corresponding bit width. + if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { + int32_t Bits = (*M + 1).exactLogBase2(); + if (Bits > 0) { + RT = IntegerType::get(Phi->getContext(), Bits); + Visited.insert(Phi); + CI.insert(J); + return J; + } + } + return Phi; +} + +/// Compute the minimal bit width needed to represent a reduction whose exit +/// instruction is given by Exit. +static std::pair computeRecurrenceType(Instruction *Exit, + DemandedBits *DB, + AssumptionCache *AC, + DominatorTree *DT) { + bool IsSigned = false; + const DataLayout &DL = Exit->getModule()->getDataLayout(); + uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); + + if (DB) { + // Use the demanded bits analysis to determine the bits that are live out + // of the exit instruction, rounding up to the nearest power of two. If the + // use of demanded bits results in a smaller bit width, we know the value + // must be positive (i.e., IsSigned = false), because if this were not the + // case, the sign bit would have been demanded. + auto Mask = DB->getDemandedBits(Exit); + MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); + } + + if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { + // If demanded bits wasn't able to limit the bit width, we can try to use + // value tracking instead. This can be the case, for example, if the value + // may be negative. + auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); + auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); + MaxBitWidth = NumTypeBits - NumSignBits; + KnownBits Bits = computeKnownBits(Exit, DL); + if (!Bits.isNonNegative()) { + // If the value is not known to be non-negative, we set IsSigned to true, + // meaning that we will use sext instructions instead of zext + // instructions to restore the original type. + IsSigned = true; + if (!Bits.isNegative()) + // If the value is not known to be negative, we don't known what the + // upper bit is, and therefore, we don't know what kind of extend we + // will need. In this case, just increase the bit width by one bit and + // use sext. + ++MaxBitWidth; + } + } + if (!isPowerOf2_64(MaxBitWidth)) + MaxBitWidth = NextPowerOf2(MaxBitWidth); + + return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), + IsSigned); +} + +/// Collect cast instructions that can be ignored in the vectorizer's cost +/// model, given a reduction exit value and the minimal type in which the +/// reduction can be represented. +static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, + Type *RecurrenceType, + SmallPtrSetImpl &Casts) { + + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(Exit); + + while (!Worklist.empty()) { + Instruction *Val = Worklist.pop_back_val(); + Visited.insert(Val); + if (auto *Cast = dyn_cast(Val)) + if (Cast->getSrcTy() == RecurrenceType) { + // If the source type of a cast instruction is equal to the recurrence + // type, it will be eliminated, and should be ignored in the vectorizer + // cost model. + Casts.insert(Cast); + continue; + } + + // Add all operands to the work list if they are loop-varying values that + // we haven't yet visited. + for (Value *O : cast(Val)->operands()) + if (auto *I = dyn_cast(O)) + if (TheLoop->contains(I) && !Visited.count(I)) + Worklist.push_back(I); + } +} + +bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, + Loop *TheLoop, bool HasFunNoNaNAttr, + RecurrenceDescriptor &RedDes, + DemandedBits *DB, + AssumptionCache *AC, + DominatorTree *DT) { + if (Phi->getNumIncomingValues() != 2) + return false; + + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); + + // ExitInstruction is the single value which is used outside the loop. + // We only allow for a single reduction value to be used outside the loop. + // This includes users of the reduction, variables (which form a cycle + // which ends in the phi node). + Instruction *ExitInstruction = nullptr; + // Indicates that we found a reduction operation in our scan. + bool FoundReduxOp = false; + + // We start with the PHI node and scan for all of the users of this + // instruction. All users must be instructions that can be used as reduction + // variables (such as ADD). We must have a single out-of-block user. The cycle + // must include the original PHI. + bool FoundStartPHI = false; + + // To recognize min/max patterns formed by a icmp select sequence, we store + // the number of instruction we saw from the recognized min/max pattern, + // to make sure we only see exactly the two instructions. + unsigned NumCmpSelectPatternInst = 0; + InstDesc ReduxDesc(false, nullptr); + + // Data used for determining if the recurrence has been type-promoted. + Type *RecurrenceType = Phi->getType(); + SmallPtrSet CastInsts; + Instruction *Start = Phi; + bool IsSigned = false; + + SmallPtrSet VisitedInsts; + SmallVector Worklist; + + // Return early if the recurrence kind does not match the type of Phi. If the + // recurrence kind is arithmetic, we attempt to look through AND operations + // resulting from the type promotion performed by InstCombine. Vector + // operations are not limited to the legal integer widths, so we may be able + // to evaluate the reduction in the narrower width. + if (RecurrenceType->isFloatingPointTy()) { + if (!isFloatingPointRecurrenceKind(Kind)) + return false; + } else { + if (!isIntegerRecurrenceKind(Kind)) + return false; + if (isArithmeticRecurrenceKind(Kind)) + Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); + } + + Worklist.push_back(Start); + VisitedInsts.insert(Start); + + // A value in the reduction can be used: + // - By the reduction: + // - Reduction operation: + // - One use of reduction value (safe). + // - Multiple use of reduction value (not safe). + // - PHI: + // - All uses of the PHI must be the reduction (safe). + // - Otherwise, not safe. + // - By instructions outside of the loop (safe). + // * One value may have several outside users, but all outside + // uses must be of the same value. + // - By an instruction that is not part of the reduction (not safe). + // This is either: + // * An instruction type other than PHI or the reduction operation. + // * A PHI in the header other than the initial PHI. + while (!Worklist.empty()) { + Instruction *Cur = Worklist.back(); + Worklist.pop_back(); + + // No Users. + // If the instruction has no users then this is a broken chain and can't be + // a reduction variable. + if (Cur->use_empty()) + return false; + + bool IsAPhi = isa(Cur); + + // A header PHI use other than the original PHI. + if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) + return false; + + // Reductions of instructions such as Div, and Sub is only possible if the + // LHS is the reduction variable. + if (!Cur->isCommutative() && !IsAPhi && !isa(Cur) && + !isa(Cur) && !isa(Cur) && + !VisitedInsts.count(dyn_cast(Cur->getOperand(0)))) + return false; + + // Any reduction instruction must be of one of the allowed kinds. We ignore + // the starting value (the Phi or an AND instruction if the Phi has been + // type-promoted). + if (Cur != Start) { + ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); + if (!ReduxDesc.isRecurrence()) + return false; + } + + // A reduction operation must only have one use of the reduction value. + if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && + hasMultipleUsesOf(Cur, VisitedInsts)) + return false; + + // All inputs to a PHI node must be a reduction value. + if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) + return false; + + if (Kind == RK_IntegerMinMax && + (isa(Cur) || isa(Cur))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa(Cur) || isa(Cur))) + ++NumCmpSelectPatternInst; + + // Check whether we found a reduction operator. + FoundReduxOp |= !IsAPhi && Cur != Start; + + // Process users of current instruction. Push non-PHI nodes after PHI nodes + // onto the stack. This way we are going to have seen all inputs to PHI + // nodes once we get to them. + SmallVector NonPHIs; + SmallVector PHIs; + for (User *U : Cur->users()) { + Instruction *UI = cast(U); + + // Check if we found the exit user. + BasicBlock *Parent = UI->getParent(); + if (!TheLoop->contains(Parent)) { + // If we already know this instruction is used externally, move on to + // the next user. + if (ExitInstruction == Cur) + continue; + + // Exit if you find multiple values used outside or if the header phi + // node is being used. In this case the user uses the value of the + // previous iteration, in which case we would loose "VF-1" iterations of + // the reduction operation if we vectorize. + if (ExitInstruction != nullptr || Cur == Phi) + return false; + + // The instruction used by an outside user must be the last instruction + // before we feed back to the reduction phi. Otherwise, we loose VF-1 + // operations on the value. + if (!is_contained(Phi->operands(), Cur)) + return false; + + ExitInstruction = Cur; + continue; + } + + // Process instructions only once (termination). Each reduction cycle + // value must only be used once, except by phi nodes and min/max + // reductions which are represented as a cmp followed by a select. + InstDesc IgnoredVal(false, nullptr); + if (VisitedInsts.insert(UI).second) { + if (isa(UI)) + PHIs.push_back(UI); + else + NonPHIs.push_back(UI); + } else if (!isa(UI) && + ((!isa(UI) && !isa(UI) && + !isa(UI)) || + !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) + return false; + + // Remember that we completed the cycle. + if (UI == Phi) + FoundStartPHI = true; + } + Worklist.append(PHIs.begin(), PHIs.end()); + Worklist.append(NonPHIs.begin(), NonPHIs.end()); + } + + // This means we have seen one but not the other instruction of the + // pattern or more than just a select and cmp. + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) + return false; + + if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) + return false; + + if (Start != Phi) { + // If the starting value is not the same as the phi node, we speculatively + // looked through an 'and' instruction when evaluating a potential + // arithmetic reduction to determine if it may have been type-promoted. + // + // We now compute the minimal bit width that is required to represent the + // reduction. If this is the same width that was indicated by the 'and', we + // can represent the reduction in the smaller type. The 'and' instruction + // will be eliminated since it will essentially be a cast instruction that + // can be ignore in the cost model. If we compute a different type than we + // did when evaluating the 'and', the 'and' will not be eliminated, and we + // will end up with different kinds of operations in the recurrence + // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is + // the case. + // + // The vectorizer relies on InstCombine to perform the actual + // type-shrinking. It does this by inserting instructions to truncate the + // exit value of the reduction to the width indicated by RecurrenceType and + // then extend this value back to the original width. If IsSigned is false, + // a 'zext' instruction will be generated; otherwise, a 'sext' will be + // used. + // + // TODO: We should not rely on InstCombine to rewrite the reduction in the + // smaller type. We should just generate a correctly typed expression + // to begin with. + Type *ComputedType; + std::tie(ComputedType, IsSigned) = + computeRecurrenceType(ExitInstruction, DB, AC, DT); + if (ComputedType != RecurrenceType) + return false; + + // The recurrence expression will be represented in a narrower type. If + // there are any cast instructions that will be unnecessary, collect them + // in CastInsts. Note that the 'and' instruction was already included in + // this list. + // + // TODO: A better way to represent this may be to tag in some way all the + // instructions that are a part of the reduction. The vectorizer cost + // model could then apply the recurrence type to these instructions, + // without needing a white list of instructions to ignore. + collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); + } + + // We found a reduction var if we have reached the original phi node and we + // only have a single instruction with out-of-loop users. + + // The ExitInstruction(Instruction which is allowed to have out-of-loop users) + // is saved as part of the RecurrenceDescriptor. + + // Save the description of this reduction variable. + RecurrenceDescriptor RD( + RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), + ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); + RedDes = RD; + + return true; +} + +/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction +/// pattern corresponding to a min(X, Y) or max(X, Y). +RecurrenceDescriptor::InstDesc +RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { + + assert((isa(I) || isa(I) || isa(I)) && + "Expect a select instruction"); + Instruction *Cmp = nullptr; + SelectInst *Select = nullptr; + + // We must handle the select(cmp()) as a single instruction. Advance to the + // select. + if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) { + if (!Cmp->hasOneUse() || !(Select = dyn_cast(*I->user_begin()))) + return InstDesc(false, I); + return InstDesc(Select, Prev.getMinMaxKind()); + } + + // Only handle single use cases for now. + if (!(Select = dyn_cast(I))) + return InstDesc(false, I); + if (!(Cmp = dyn_cast(I->getOperand(0))) && + !(Cmp = dyn_cast(I->getOperand(0)))) + return InstDesc(false, I); + if (!Cmp->hasOneUse()) + return InstDesc(false, I); + + Value *CmpLeft; + Value *CmpRight; + + // Look for a min/max pattern. + if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_UIntMin); + else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_UIntMax); + else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_SIntMax); + else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return InstDesc(Select, MRK_FloatMax); + + return InstDesc(false, I); +} + +RecurrenceDescriptor::InstDesc +RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + InstDesc &Prev, bool HasFunNoNaNAttr) { + bool FP = I->getType()->isFloatingPointTy(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); + if (!UAI && FP && !I->isFast()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + + switch (I->getOpcode()) { + default: + return InstDesc(false, I); + case Instruction::PHI: + return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); + case Instruction::Sub: + case Instruction::Add: + return InstDesc(Kind == RK_IntegerAdd, I); + case Instruction::Mul: + return InstDesc(Kind == RK_IntegerMult, I); + case Instruction::And: + return InstDesc(Kind == RK_IntegerAnd, I); + case Instruction::Or: + return InstDesc(Kind == RK_IntegerOr, I); + case Instruction::Xor: + return InstDesc(Kind == RK_IntegerXor, I); + case Instruction::FMul: + return InstDesc(Kind == RK_FloatMult, I, UAI); + case Instruction::FSub: + case Instruction::FAdd: + return InstDesc(Kind == RK_FloatAdd, I, UAI); + case Instruction::FCmp: + case Instruction::ICmp: + case Instruction::Select: + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) + return InstDesc(false, I); + return isMinMaxSelectCmpPattern(I, Prev); + } +} + +bool RecurrenceDescriptor::hasMultipleUsesOf( + Instruction *I, SmallPtrSetImpl &Insts) { + unsigned NumUses = 0; + for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; + ++Use) { + if (Insts.count(dyn_cast(*Use))) + ++NumUses; + if (NumUses > 1) + return true; + } + + return false; +} +bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, + RecurrenceDescriptor &RedDes, + DemandedBits *DB, AssumptionCache *AC, + DominatorTree *DT) { + + BasicBlock *Header = TheLoop->getHeader(); + Function &F = *Header->getParent(); + bool HasFunNoNaNAttr = + F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + + if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, + DB, AC, DT)) { + DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, + AC, DT)) { + DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); + return true; + } + // Not a reduction of known type. + return false; +} + +bool RecurrenceDescriptor::isFirstOrderRecurrence( + PHINode *Phi, Loop *TheLoop, + DenseMap &SinkAfter, DominatorTree *DT) { + + // Ensure the phi node is in the loop header and has two incoming values. + if (Phi->getParent() != TheLoop->getHeader() || + Phi->getNumIncomingValues() != 2) + return false; + + // Ensure the loop has a preheader and a single latch block. The loop + // vectorizer will need the latch to set up the next iteration of the loop. + auto *Preheader = TheLoop->getLoopPreheader(); + auto *Latch = TheLoop->getLoopLatch(); + if (!Preheader || !Latch) + return false; + + // Ensure the phi node's incoming blocks are the loop preheader and latch. + if (Phi->getBasicBlockIndex(Preheader) < 0 || + Phi->getBasicBlockIndex(Latch) < 0) + return false; + + // Get the previous value. The previous value comes from the latch edge while + // the initial value comes form the preheader edge. + auto *Previous = dyn_cast(Phi->getIncomingValueForBlock(Latch)); + if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || + SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. + return false; + + // Ensure every user of the phi node is dominated by the previous value. + // The dominance requirement ensures the loop vectorizer will not need to + // vectorize the initial value prior to the first iteration of the loop. + // TODO: Consider extending this sinking to handle other kinds of instructions + // and expressions, beyond sinking a single cast past Previous. + if (Phi->hasOneUse()) { + auto *I = Phi->user_back(); + if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && + DT->dominates(Previous, I->user_back())) { + if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. + SinkAfter[I] = Previous; + return true; + } + } + + for (User *U : Phi->users()) + if (auto *I = dyn_cast(U)) { + if (!DT->dominates(Previous, I)) + return false; + } + + return true; +} + +/// This function returns the identity element (or neutral element) for +/// the operation K. +Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, + Type *Tp) { + switch (K) { + case RK_IntegerXor: + case RK_IntegerAdd: + case RK_IntegerOr: + // Adding, Xoring, Oring zero to a number does not change it. + return ConstantInt::get(Tp, 0); + case RK_IntegerMult: + // Multiplying a number by 1 does not change it. + return ConstantInt::get(Tp, 1); + case RK_IntegerAnd: + // AND-ing a number with an all-1 value does not change it. + return ConstantInt::get(Tp, -1, true); + case RK_FloatMult: + // Multiplying a number by 1 does not change it. + return ConstantFP::get(Tp, 1.0L); + case RK_FloatAdd: + // Adding zero to a number does not change it. + return ConstantFP::get(Tp, 0.0L); + default: + llvm_unreachable("Unknown recurrence kind"); + } +} + +/// This function translates the recurrence kind to an LLVM binary operator. +unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { + switch (Kind) { + case RK_IntegerAdd: + return Instruction::Add; + case RK_IntegerMult: + return Instruction::Mul; + case RK_IntegerOr: + return Instruction::Or; + case RK_IntegerAnd: + return Instruction::And; + case RK_IntegerXor: + return Instruction::Xor; + case RK_FloatMult: + return Instruction::FMul; + case RK_FloatAdd: + return Instruction::FAdd; + case RK_IntegerMinMax: + return Instruction::ICmp; + case RK_FloatMinMax: + return Instruction::FCmp; + default: + llvm_unreachable("Unknown recurrence operation"); + } +} + +Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, + MinMaxRecurrenceKind RK, + Value *Left, Value *Right) { + CmpInst::Predicate P = CmpInst::ICMP_NE; + switch (RK) { + default: + llvm_unreachable("Unknown min/max recurrence kind"); + case MRK_UIntMin: + P = CmpInst::ICMP_ULT; + break; + case MRK_UIntMax: + P = CmpInst::ICMP_UGT; + break; + case MRK_SIntMin: + P = CmpInst::ICMP_SLT; + break; + case MRK_SIntMax: + P = CmpInst::ICMP_SGT; + break; + case MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; + } + + // We only match FP sequences that are 'fast', so we can unconditionally + // set it on any generated instructions. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF; + FMF.setFast(); + Builder.setFastMathFlags(FMF); + + Value *Cmp; + if (RK == MRK_FloatMin || RK == MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + +InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, + const SCEV *Step, BinaryOperator *BOp, + SmallVectorImpl *Casts) + : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { + assert(IK != IK_NoInduction && "Not an induction"); + + // Start value type should match the induction kind and the value + // itself should not be null. + assert(StartValue && "StartValue is null"); + assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && + "StartValue is not a pointer for pointer induction"); + assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && + "StartValue is not an integer for integer induction"); + + // Check the Step Value. It should be non-zero integer value. + assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && + "Step value is zero"); + + assert((IK != IK_PtrInduction || getConstIntStepValue()) && + "Step value should be constant for pointer induction"); + assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && + "StepValue is not an integer"); + + assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && + "StepValue is not FP for FpInduction"); + assert((IK != IK_FpInduction || (InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub))) && + "Binary opcode should be specified for FP induction"); + + if (Casts) { + for (auto &Inst : *Casts) { + RedundantCasts.push_back(Inst); + } + } +} + +int InductionDescriptor::getConsecutiveDirection() const { + ConstantInt *ConstStep = getConstIntStepValue(); + if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) + return ConstStep->getSExtValue(); + return 0; +} + +ConstantInt *InductionDescriptor::getConstIntStepValue() const { + if (isa(Step)) + return dyn_cast(cast(Step)->getValue()); + return nullptr; +} + +Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, + ScalarEvolution *SE, + const DataLayout& DL) const { + + SCEVExpander Exp(*SE, DL, "induction"); + assert(Index->getType() == Step->getType() && + "Index type does not match StepValue type"); + switch (IK) { + case IK_IntInduction: { + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + + // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution + // and calculate (Start + Index * Step) for all cases, without + // special handling for "isOne" and "isMinusOne". + // But in the real life the result code getting worse. We mix SCEV + // expressions and ADD/SUB operations and receive redundant + // intermediate values being calculated in different ways and + // Instcombine is unable to reduce them all. + + if (getConstIntStepValue() && + getConstIntStepValue()->isMinusOne()) + return B.CreateSub(StartValue, Index); + if (getConstIntStepValue() && + getConstIntStepValue()->isOne()) + return B.CreateAdd(StartValue, Index); + const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), + SE->getMulExpr(Step, SE->getSCEV(Index))); + return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); + } + case IK_PtrInduction: { + assert(isa(Step) && + "Expected constant step for pointer induction"); + const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); + Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); + return B.CreateGEP(nullptr, StartValue, Index); + } + case IK_FpInduction: { + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + assert(InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub) && + "Original bin op should be defined for FP induction"); + + Value *StepValue = cast(Step)->getValue(); + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; + Flags.setFast(); + + Value *MulExp = B.CreateFMul(StepValue, Index); + if (isa(MulExp)) + // We have to check, the MulExp may be a constant. + cast(MulExp)->setFastMathFlags(Flags); + + Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, + MulExp, "induction"); + if (isa(BOp)) + cast(BOp)->setFastMathFlags(Flags); + + return BOp; + } + case IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} + +bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, + ScalarEvolution *SE, + InductionDescriptor &D) { + + // Here we only handle FP induction variables. + assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); + + if (TheLoop->getHeader() != Phi->getParent()) + return false; + + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi if it has a unique entry value and a unique backedge value. + if (Phi->getNumIncomingValues() != 2) + return false; + Value *BEValue = nullptr, *StartValue = nullptr; + if (TheLoop->contains(Phi->getIncomingBlock(0))) { + BEValue = Phi->getIncomingValue(0); + StartValue = Phi->getIncomingValue(1); + } else { + assert(TheLoop->contains(Phi->getIncomingBlock(1)) && + "Unexpected Phi node in the loop"); + BEValue = Phi->getIncomingValue(1); + StartValue = Phi->getIncomingValue(0); + } + + BinaryOperator *BOp = dyn_cast(BEValue); + if (!BOp) + return false; + + Value *Addend = nullptr; + if (BOp->getOpcode() == Instruction::FAdd) { + if (BOp->getOperand(0) == Phi) + Addend = BOp->getOperand(1); + else if (BOp->getOperand(1) == Phi) + Addend = BOp->getOperand(0); + } else if (BOp->getOpcode() == Instruction::FSub) + if (BOp->getOperand(0) == Phi) + Addend = BOp->getOperand(1); + + if (!Addend) + return false; + + // The addend should be loop invariant + if (auto *I = dyn_cast(Addend)) + if (TheLoop->contains(I)) + return false; + + // FP Step has unknown SCEV + const SCEV *Step = SE->getUnknown(Addend); + D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); + return true; +} + +/// This function is called when we suspect that the update-chain of a phi node +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev +/// expression with casts). +/// +/// For example, without a predicate the scev expression can take the following +/// form: +/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) +/// +/// It corresponds to the following IR sequence: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = "ExtTrunc i64 %x" +/// %add = add i64 %casted_phi, %step +/// +/// where %x is given in \p PN, +/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, +/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of +/// several forms, for example, such as: +/// ExtTrunc1: %casted_phi = and %x, 2^n-1 +/// or: +/// ExtTrunc2: %t = shl %x, m +/// %casted_phi = ashr %t, m +/// +/// If we are able to find such sequence, we return the instructions +/// we found, namely %casted_phi and the instructions on its use-def chain up +/// to the phi (not including the phi). +static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, + const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, + SmallVectorImpl &CastInsts) { + + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + auto *PN = cast(PhiScev->getValue()); + assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); + const Loop *L = AR->getLoop(); + + // Find any cast instructions that participate in the def-use chain of + // PhiScev in the loop. + // FORNOW/TODO: We currently expect the def-use chain to include only + // two-operand instructions, where one of the operands is an invariant. + // createAddRecFromPHIWithCasts() currently does not support anything more + // involved than that, so we keep the search simple. This can be + // extended/generalized as needed. + + auto getDef = [&](const Value *Val) -> Value * { + const BinaryOperator *BinOp = dyn_cast(Val); + if (!BinOp) + return nullptr; + Value *Op0 = BinOp->getOperand(0); + Value *Op1 = BinOp->getOperand(1); + Value *Def = nullptr; + if (L->isLoopInvariant(Op0)) + Def = Op1; + else if (L->isLoopInvariant(Op1)) + Def = Op0; + return Def; + }; + + // Look for the instruction that defines the induction via the + // loop backedge. + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + Value *Val = PN->getIncomingValueForBlock(Latch); + if (!Val) + return false; + + // Follow the def-use chain until the induction phi is reached. + // If on the way we encounter a Value that has the same SCEV Expr as the + // phi node, we can consider the instructions we visit from that point + // as part of the cast-sequence that can be ignored. + bool InCastSequence = false; + auto *Inst = dyn_cast(Val); + while (Val != PN) { + // If we encountered a phi node other than PN, or if we left the loop, + // we bail out. + if (!Inst || !L->contains(Inst)) { + return false; + } + auto *AddRec = dyn_cast(PSE.getSCEV(Val)); + if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) + InCastSequence = true; + if (InCastSequence) { + // Only the last instruction in the cast sequence is expected to have + // uses outside the induction def-use chain. + if (!CastInsts.empty()) + if (!Inst->hasOneUse()) + return false; + CastInsts.push_back(Inst); + } + Val = getDef(Val); + if (!Val) + return false; + Inst = dyn_cast(Val); + } + + return InCastSequence; +} + +bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, + PredicatedScalarEvolution &PSE, + InductionDescriptor &D, + bool Assume) { + Type *PhiTy = Phi->getType(); + + // Handle integer and pointer inductions variables. + // Now we handle also FP induction but not trying to make a + // recurrent expression from the PHI node in-place. + + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && + !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) + return false; + + if (PhiTy->isFloatingPointTy()) + return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); + + const SCEV *PhiScev = PSE.getSCEV(Phi); + const auto *AR = dyn_cast(PhiScev); + + // We need this expression to be an AddRecExpr. + if (Assume && !AR) + AR = PSE.getAsAddRec(Phi); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + // Record any Cast instructions that participate in the induction update + const auto *SymbolicPhi = dyn_cast(PhiScev); + // If we started from an UnknownSCEV, and managed to build an addRecurrence + // only after enabling Assume with PSCEV, this means we may have encountered + // cast instructions that required adding a runtime check in order to + // guarantee the correctness of the AddRecurence respresentation of the + // induction. + if (PhiScev != AR && SymbolicPhi) { + SmallVector Casts; + if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); + } + + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); +} + +bool InductionDescriptor::isInductionPHI( + PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr, + SmallVectorImpl *CastsToIgnore) { + Type *PhiTy = Phi->getType(); + // We only handle integer and pointer inductions variables. + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + // Check that the PHI is consecutive. + const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); + const SCEVAddRecExpr *AR = dyn_cast(PhiScev); + + if (!AR) { + DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); + return false; + } + + if (AR->getLoop() != TheLoop) { + // FIXME: We should treat this as a uniform. Unfortunately, we + // don't currently know how to handled uniform PHIs. + DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); + return false; + } + + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); + const SCEV *Step = AR->getStepRecurrence(*SE); + // Calculate the pointer stride and check if it is consecutive. + // The stride may be a constant or a loop invariant integer value. + const SCEVConstant *ConstStep = dyn_cast(Step); + if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) + return false; + + if (PhiTy->isIntegerTy()) { + D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, + CastsToIgnore); + return true; + } + + assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); + // Pointer induction should be a constant. + if (!ConstStep) + return false; + + ConstantInt *CV = ConstStep->getValue(); + Type *PointerElementType = PhiTy->getPointerElementType(); + // The pointer stride cannot be determined if the pointer element type is not + // sized. + if (!PointerElementType->isSized()) + return false; + + const DataLayout &DL = Phi->getModule()->getDataLayout(); + int64_t Size = static_cast(DL.getTypeAllocSize(PointerElementType)); + if (!Size) + return false; + + int64_t CVSize = CV->getSExtValue(); + if (CVSize % Size) + return false; + auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, + true /* signed */); + D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); + return true; +} Index: lib/Analysis/LoopVectorizationLegality.cpp =================================================================== --- lib/Analysis/LoopVectorizationLegality.cpp +++ lib/Analysis/LoopVectorizationLegality.cpp @@ -0,0 +1,786 @@ +//===- LoopVectorizationLegality.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides loop vectorization legality analysis. Original code +// resided in LoopVectorize.cpp for a long time. Moved to Analysis tree +// for modularity and reusability. +// +// At this point, it is implemented as a utility class, not as an analysis +// pass. It should be easy to create an analysis pass around it if there +// is a need. +// +#include "llvm/Analysis/LoopVectorizationLegality.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/IntrinsicInst.h" + +using namespace llvm; + +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + +static cl::opt + EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, + cl::desc("Enable if-conversion during vectorization.")); + +static cl::opt PragmaVectorizeMemoryCheckThreshold( + "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum allowed number of runtime memory checks with a " + "vectorize(enable) pragma.")); + +static cl::opt VectorizeSCEVCheckThreshold( + "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed.")); + +static cl::opt PragmaVectorizeSCEVCheckThreshold( + "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, + cl::desc("The maximum number of SCEV checks allowed with a " + "vectorize(enable) pragma")); + +/// Maximum vectorization interleave count. +static const unsigned MaxInterleaveFactor = 16; + +namespace llvm { +/// Create an analysis remark that explains why vectorization failed +/// +/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p +/// RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be +/// streamed to. +OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, + StringRef RemarkName, + Loop *TheLoop, + Instruction *I) { + Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); + R << "loop not vectorized: "; + return R; +} +} // namespace llvm + +bool LoopVectorizeHints::Hint::validate(unsigned Val) { + switch (Kind) { + case HK_WIDTH: + return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; + case HK_UNROLL: + return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; + case HK_FORCE: + return (Val <= 1); + case HK_ISVECTORIZED: + return (Val == 0 || Val == 1); + } + return false; +} + +LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving, + OptimizationRemarkEmitter &ORE) + : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), + Interleave("interleave.count", DisableInterleaving, HK_UNROLL), + Force("vectorize.enable", FK_Undefined, HK_FORCE), + IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { + // Populate values with existing loop metadata. + getHintsFromMetadata(); + + // force-vector-interleave overrides DisableInterleaving. + if (VectorizerParams::isInterleaveForced()) + Interleave.Value = VectorizerParams::VectorizationInterleave; + + if (IsVectorized.Value != 1) + // If the vectorization width and interleaving count are both 1 then + // consider the loop to have been already vectorized because there's + // nothing more that we can do. + IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; + DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() + << "LV: Interleaving disabled by the pass manager\n"); +} + +bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L, + bool AlwaysVectorize) const { + if (getForce() == LoopVectorizeHints::FK_Disabled) { + DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); + emitRemarkWithHints(); + return false; + } + + if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { + DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); + emitRemarkWithHints(); + return false; + } + + if (getIsVectorized() == 1) { + DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); + // FIXME: Add interleave.disable metadata. This will allow + // vectorize.disable to be used without disabling the pass and errors + // to differentiate between disabled vectorization and a width of 1. + ORE.emit([&]() { + return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), + "AllDisabled", L->getStartLoc(), + L->getHeader()) + << "loop not vectorized: vectorization and interleaving are " + "explicitly disabled, or the loop has already been " + "vectorized"; + }); + return false; + } + + return true; +} + +void LoopVectorizeHints::emitRemarkWithHints() const { + using namespace ore; + + ORE.emit([&]() { + if (Force.Value == LoopVectorizeHints::FK_Disabled) + return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", + TheLoop->getStartLoc(), + TheLoop->getHeader()) + << "loop not vectorized: vectorization is explicitly disabled"; + else { + OptimizationRemarkMissed R(LV_NAME, "MissedDetails", + TheLoop->getStartLoc(), TheLoop->getHeader()); + R << "loop not vectorized"; + if (Force.Value == LoopVectorizeHints::FK_Enabled) { + R << " (Force=" << NV("Force", true); + if (Width.Value != 0) + R << ", Vector Width=" << NV("VectorWidth", Width.Value); + if (Interleave.Value != 0) + R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); + R << ")"; + } + return R; + } + }); +} + +const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { + if (getWidth() == 1) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Disabled) + return LV_NAME; + if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) + return LV_NAME; + return OptimizationRemarkAnalysis::AlwaysPrint; +} + +void LoopVectorizeHints::getHintsFromMetadata() { + MDNode *LoopID = TheLoop->getLoopID(); + if (!LoopID) + return; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + const MDString *S = nullptr; + SmallVector Args; + + // The expected hint is either a MDString or a MDNode with the first + // operand a MDString. + if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { + if (!MD || MD->getNumOperands() == 0) + continue; + S = dyn_cast(MD->getOperand(0)); + for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) + Args.push_back(MD->getOperand(i)); + } else { + S = dyn_cast(LoopID->getOperand(i)); + assert(Args.size() == 0 && "too many arguments for MDString"); + } + + if (!S) + continue; + + // Check if the hint starts with the loop metadata prefix. + StringRef Name = S->getString(); + if (Args.size() == 1) + setHint(Name, Args[0]); + } +} + +void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { + if (!Name.startswith(Prefix())) + return; + Name = Name.substr(Prefix().size(), StringRef::npos); + + const ConstantInt *C = mdconst::dyn_extract(Arg); + if (!C) + return; + unsigned Val = C->getZExtValue(); + + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; + for (auto H : Hints) { + if (Name == H->Name) { + if (H->validate(Val)) + H->Value = Val; + else + DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); + break; + } + } +} + +MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name, + unsigned V) const { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = { + MDString::get(Context, Name), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node, + ArrayRef HintTypes) { + MDString *Name = dyn_cast(Node->getOperand(0)); + if (!Name) + return false; + + for (auto H : HintTypes) + if (Name->getString().endswith(H.Name)) + return true; + return false; +} + +void LoopVectorizeHints::writeHintsToMetadata(ArrayRef HintTypes) { + if (HintTypes.empty()) + return; + + // Reserve the first element to LoopID (see below). + SmallVector MDs(1); + // If the loop already has metadata, then ignore the existing operands. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If node in update list, ignore old value. + if (!matchesHintMetadataName(Node, HintTypes)) + MDs.push_back(Node); + } + } + + // Now, add the missing hints. + for (auto H : HintTypes) + MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); + + // Replace current metadata node with new one. + LLVMContext &Context = TheLoop->getHeader()->getContext(); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + + TheLoop->setLoopID(NewLoopID); +} + +bool LoopVectorizationRequirements::doesNotMeet( + Function *F, Loop *L, const LoopVectorizeHints &Hints) { + const char *PassName = Hints.vectorizeAnalysisPassName(); + bool Failed = false; + if (UnsafeAlgebraInst && !Hints.allowReordering()) { + ORE.emit([&]() { + return OptimizationRemarkAnalysisFPCommute( + PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), + UnsafeAlgebraInst->getParent()) + << "loop not vectorized: cannot prove it is safe to reorder " + "floating-point operations"; + }); + Failed = true; + } + + // Test if runtime memcheck thresholds are exceeded. + bool PragmaThresholdReached = + NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; + bool ThresholdReached = + NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; + if ((ThresholdReached && !Hints.allowReordering()) || + PragmaThresholdReached) { + ORE.emit([&]() { + return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", + L->getStartLoc(), + L->getHeader()) + << "loop not vectorized: cannot prove it is safe to reorder " + "memory operations"; + }); + DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + Failed = true; + } + + return Failed; +} + +/// \brief Check whether it is safe to if-convert this phi node. +/// +/// Phi nodes with constant expressions that can trap are not safe to if +/// convert. +static bool canIfConvertPHINodes(BasicBlock *BB) { + for (PHINode &Phi : BB->phis()) { + for (Value *V : Phi.incoming_values()) + if (auto *C = dyn_cast(V)) + if (C->canTrap()) + return false; + } + return true; +} + +static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { + if (Ty->isPointerTy()) + return DL.getIntPtrType(Ty); + + // It is possible that char's or short's overflow when we ask for the loop's + // trip count, work around this by changing the type size. + if (Ty->getScalarSizeInBits() < 32) + return Type::getInt32Ty(Ty->getContext()); + + return Ty; +} + +static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { + Ty0 = convertPointerToIntegerType(DL, Ty0); + Ty1 = convertPointerToIntegerType(DL, Ty1); + if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) + return Ty0; + return Ty1; +} + +/// \brief Check that the instruction has outside loop users and is not an +/// identified reduction variable. +static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, + SmallPtrSetImpl &AllowedExit) { + // Reduction and Induction instructions are allowed to have exit users. All + // other instructions must not have external users. + if (!AllowedExit.count(Inst)) + // Check that all of the users of the loop are inside the BB. + for (User *U : Inst->users()) { + Instruction *UI = cast(U); + // This user may be a reduction exit value. + if (!TheLoop->contains(UI)) { + DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); + return true; + } + } + return false; +} + +void LoopVectorizationLegality::addInductionPhi( + PHINode *Phi, const InductionDescriptor &ID, + SmallPtrSetImpl &AllowedExit) { + Inductions[Phi] = ID; + + // In case this induction also comes with casts that we know we can ignore + // in the vectorized loop body, record them here. All casts could be recorded + // here for ignoring, but suffices to record only the first (as it is the + // only one that may bw used outside the cast sequence). + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (!Casts.empty()) + InductionCastsToIgnore.insert(*Casts.begin()); + + Type *PhiTy = Phi->getType(); + const DataLayout &DL = Phi->getModule()->getDataLayout(); + + // Get the widest type. + if (!PhiTy->isFloatingPointTy()) { + if (!WidestIndTy) + WidestIndTy = convertPointerToIntegerType(DL, PhiTy); + else + WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); + } + + // Int inductions are special because we only allow one IV. + if (ID.getKind() == InductionDescriptor::IK_IntInduction && + ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && + isa(ID.getStartValue()) && + cast(ID.getStartValue())->isNullValue()) { + + // Use the phi node with the widest type as induction. Use the last + // one if there are multiple (no good reason for doing this other + // than it is expedient). We've checked that it begins at zero and + // steps by one, so this is a canonical induction variable. + if (!PrimaryInduction || PhiTy == WidestIndTy) + PrimaryInduction = Phi; + } + + // Both the PHI node itself, and the "post-increment" value feeding + // back into the PHI node may have external users. + // We can allow those uses, except if the SCEVs we have for them rely + // on predicates that only hold within the loop, since allowing the exit + // currently means re-using this SCEV outside the loop. + if (PSE.getUnionPredicate().isAlwaysTrue()) { + AllowedExit.insert(Phi); + AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + } + + DEBUG(dbgs() << "LV: Found an induction variable.\n"); +} + +bool LoopVectorizationLegality::canVectorizeInstrs() { + BasicBlock *Header = TheLoop->getHeader(); + + // Look for the attribute signaling the absence of NaNs. + Function &F = *Header->getParent(); + HasFunNoNaNAttr = + F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; + + // For each block in the loop. + for (BasicBlock *BB : TheLoop->blocks()) { + // Scan the instructions in the block and look for hazards. + for (Instruction &I : *BB) { + if (auto *Phi = dyn_cast(&I)) { + Type *PhiTy = Phi->getType(); + // Check that this PHI type is allowed. + if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && + !PhiTy->isPointerTy()) { + ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) + << "loop control flow is not understood by vectorizer"); + DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); + return false; + } + + // If this PHINode is not in the header block, then we know that we + // can convert it to select during if-conversion. No need to check if + // the PHIs in this block are induction or reduction variables. + if (BB != Header) { + // Check that this instruction has no outside users or is an + // identified reduction value with an outside user. + if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) + continue; + ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi) + << "value could not be identified as " + "an induction or reduction variable"); + return false; + } + + // We only allow if-converted PHIs with exactly two incoming values. + if (Phi->getNumIncomingValues() != 2) { + ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) + << "control flow not understood by vectorizer"); + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + return false; + } + + RecurrenceDescriptor RedDes; + if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, + DT)) { + if (RedDes.hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); + AllowedExit.insert(RedDes.getLoopExitInstr()); + Reductions[Phi] = RedDes; + continue; + } + + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { + addInductionPhi(Phi, ID, AllowedExit); + if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) + Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); + continue; + } + + if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, + SinkAfter, DT)) { + FirstOrderRecurrences.insert(Phi); + continue; + } + + // As a last resort, coerce the PHI to a AddRec expression + // and re-try classifying it a an induction PHI. + if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { + addInductionPhi(Phi, ID, AllowedExit); + continue; + } + + ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi) + << "value that could not be identified as " + "reduction is used outside the loop"); + DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); + return false; + } // end of PHI handling + + // We handle calls that: + // * Are debug info intrinsics. + // * Have a mapping to an IR intrinsic. + // * Have a vector version available. + auto *CI = dyn_cast(&I); + if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && + !isa(CI) && + !(CI->getCalledFunction() && TLI && + TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { + ORE->emit(createMissedAnalysis("CantVectorizeCall", CI) + << "call instruction cannot be vectorized"); + DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); + return false; + } + + // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the + // second argument is the same (i.e. loop invariant) + if (CI && hasVectorInstrinsicScalarOpd( + getVectorIntrinsicIDForCall(CI, TLI), 1)) { + auto *SE = PSE.getSE(); + if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { + ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI) + << "intrinsic instruction cannot be vectorized"); + DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); + return false; + } + } + + // Check that the instruction return type is vectorizable. + // Also, we can't vectorize extractelement instructions. + if ((!VectorType::isValidElementType(I.getType()) && + !I.getType()->isVoidTy()) || + isa(I)) { + ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I) + << "instruction return type cannot be vectorized"); + DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); + return false; + } + + // Check that the stored type is vectorizable. + if (auto *ST = dyn_cast(&I)) { + Type *T = ST->getValueOperand()->getType(); + if (!VectorType::isValidElementType(T)) { + ORE->emit(createMissedAnalysis("CantVectorizeStore", ST) + << "store instruction cannot be vectorized"); + return false; + } + + // FP instructions can allow unsafe algebra, thus vectorizable by + // non-IEEE-754 compliant SIMD units. + // This applies to floating-point math operations and calls, not memory + // operations, shuffles, or casts, as they don't change precision or + // semantics. + } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && + !I.isFast()) { + DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); + Hints->setPotentiallyUnsafe(); + } + + // Reduction instructions are allowed to have exit users. + // All other instructions must not have external users. + if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { + ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I) + << "value cannot be used outside the loop"); + return false; + } + } // next instr. + } + + if (!PrimaryInduction) { + DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); + if (Inductions.empty()) { + ORE->emit(createMissedAnalysis("NoInductionVariable") + << "loop induction variable could not be identified"); + return false; + } + } + + // Now we know the widest induction type, check if our found induction + // is the same size. If it's not, unset it here and InnerLoopVectorizer + // will create another. + if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) + PrimaryInduction = nullptr; + + return true; +} + +int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { + const ValueToValueMap &Strides = + getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); + + int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); + if (Stride == 1 || Stride == -1) + return Stride; + return 0; +} + +bool LoopVectorizationLegality::isUniform(Value *V) { + return LAI->isUniform(V); +} + +bool LoopVectorizationLegality::canVectorizeWithIfConvert() { + if (!EnableIfConversion) { + ORE->emit(createMissedAnalysis("IfConversionDisabled") + << "if-conversion is disabled"); + return false; + } + + assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); + + // A list of pointers that we can safely read and write to. + SmallPtrSet SafePointes; + + // Collect safe addresses. + for (BasicBlock *BB : TheLoop->blocks()) { + if (blockNeedsPredication(BB)) + continue; + + for (Instruction &I : *BB) + if (auto *Ptr = getLoadStorePointerOperand(&I)) + SafePointes.insert(Ptr); + } + + // Collect the blocks that need predication. + BasicBlock *Header = TheLoop->getHeader(); + for (BasicBlock *BB : TheLoop->blocks()) { + // We don't support switch statements inside loops. + if (!isa(BB->getTerminator())) { + ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator()) + << "loop contains a switch statement"); + return false; + } + + // We must be able to predicate all blocks that need to be predicated. + if (blockNeedsPredication(BB)) { + if (!blockCanBePredicated(BB, SafePointes)) { + ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) + << "control flow cannot be substituted for a select"); + return false; + } + } else if (BB != Header && !canIfConvertPHINodes(BB)) { + ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) + << "control flow cannot be substituted for a select"); + return false; + } + } + + // We can if-convert this loop. + return true; +} + +bool LoopVectorizationLegality::canVectorize() { + // Store the result and return it at the end instead of exiting early, in case + // allowExtraAnalysis is used to report multiple reasons for not vectorizing. + bool Result = true; + + bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); + // We must have a loop in canonical form. Loops with indirectbr in them cannot + // be canonicalized. + if (!TheLoop->getLoopPreheader()) { + DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); + ORE->emit(createMissedAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by vectorizer"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // FIXME: The code is currently dead, since the loop gets sent to + // LoopVectorizationLegality is already an innermost loop. + // + // We can only vectorize innermost loops. + if (!TheLoop->empty()) { + ORE->emit(createMissedAnalysis("NotInnermostLoop") + << "loop is not the innermost loop"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // We must have a single backedge. + if (TheLoop->getNumBackEdges() != 1) { + ORE->emit(createMissedAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by vectorizer"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // We must have a single exiting block. + if (!TheLoop->getExitingBlock()) { + ORE->emit(createMissedAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by vectorizer"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // We only handle bottom-tested loops, i.e. loop in which the condition is + // checked at the end of each iteration. With that we can assume that all + // instructions in the loop are executed the same number of times. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { + ORE->emit(createMissedAnalysis("CFGNotUnderstood") + << "loop control flow is not understood by vectorizer"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // We need to have a loop header. + DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() + << '\n'); + + // Check if we can if-convert non-single-bb loops. + unsigned NumBlocks = TheLoop->getNumBlocks(); + if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { + DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // Check if we can vectorize the instructions and CFG in this loop. + if (!canVectorizeInstrs()) { + DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // Go over each instruction and look at memory deps. + if (!canVectorizeMemory()) { + DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + DEBUG(dbgs() << "LV: We can vectorize this loop" + << (LAI->getRuntimePointerChecking()->Need + ? " (with a runtime bound check)" + : "") + << "!\n"); + + unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; + if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) + SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; + + if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { + ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks") + << "Too many SCEV assumptions need to be made and checked " + << "at runtime"); + DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); + if (DoExtraAnalysis) + Result = false; + else + return false; + } + + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis + // which may limit our maximum vectorization factor, so just return true with + // no restrictions. + return Result; +} Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -43,6 +43,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -51,6 +51,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/Local.h" Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -13,1131 +13,17 @@ #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/MustExecute.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; -using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-utils" -bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, - SmallPtrSetImpl &Set) { - for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) - if (!Set.count(dyn_cast(*Use))) - return false; - return true; -} - -bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) { - switch (Kind) { - default: - break; - case RK_IntegerAdd: - case RK_IntegerMult: - case RK_IntegerOr: - case RK_IntegerAnd: - case RK_IntegerXor: - case RK_IntegerMinMax: - return true; - } - return false; -} - -bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) { - return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind); -} - -bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) { - switch (Kind) { - default: - break; - case RK_IntegerAdd: - case RK_IntegerMult: - case RK_FloatAdd: - case RK_FloatMult: - return true; - } - return false; -} - -/// Determines if Phi may have been type-promoted. If Phi has a single user -/// that ANDs the Phi with a type mask, return the user. RT is updated to -/// account for the narrower bit width represented by the mask, and the AND -/// instruction is added to CI. -static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &CI) { - if (!Phi->hasOneUse()) - return Phi; - - const APInt *M = nullptr; - Instruction *I, *J = cast(Phi->use_begin()->getUser()); - - // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT - // with a new integer type of the corresponding bit width. - if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { - int32_t Bits = (*M + 1).exactLogBase2(); - if (Bits > 0) { - RT = IntegerType::get(Phi->getContext(), Bits); - Visited.insert(Phi); - CI.insert(J); - return J; - } - } - return Phi; -} - -/// Compute the minimal bit width needed to represent a reduction whose exit -/// instruction is given by Exit. -static std::pair computeRecurrenceType(Instruction *Exit, - DemandedBits *DB, - AssumptionCache *AC, - DominatorTree *DT) { - bool IsSigned = false; - const DataLayout &DL = Exit->getModule()->getDataLayout(); - uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); - - if (DB) { - // Use the demanded bits analysis to determine the bits that are live out - // of the exit instruction, rounding up to the nearest power of two. If the - // use of demanded bits results in a smaller bit width, we know the value - // must be positive (i.e., IsSigned = false), because if this were not the - // case, the sign bit would have been demanded. - auto Mask = DB->getDemandedBits(Exit); - MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); - } - - if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { - // If demanded bits wasn't able to limit the bit width, we can try to use - // value tracking instead. This can be the case, for example, if the value - // may be negative. - auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); - auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); - MaxBitWidth = NumTypeBits - NumSignBits; - KnownBits Bits = computeKnownBits(Exit, DL); - if (!Bits.isNonNegative()) { - // If the value is not known to be non-negative, we set IsSigned to true, - // meaning that we will use sext instructions instead of zext - // instructions to restore the original type. - IsSigned = true; - if (!Bits.isNegative()) - // If the value is not known to be negative, we don't known what the - // upper bit is, and therefore, we don't know what kind of extend we - // will need. In this case, just increase the bit width by one bit and - // use sext. - ++MaxBitWidth; - } - } - if (!isPowerOf2_64(MaxBitWidth)) - MaxBitWidth = NextPowerOf2(MaxBitWidth); - - return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), - IsSigned); -} - -/// Collect cast instructions that can be ignored in the vectorizer's cost -/// model, given a reduction exit value and the minimal type in which the -/// reduction can be represented. -static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, - Type *RecurrenceType, - SmallPtrSetImpl &Casts) { - - SmallVector Worklist; - SmallPtrSet Visited; - Worklist.push_back(Exit); - - while (!Worklist.empty()) { - Instruction *Val = Worklist.pop_back_val(); - Visited.insert(Val); - if (auto *Cast = dyn_cast(Val)) - if (Cast->getSrcTy() == RecurrenceType) { - // If the source type of a cast instruction is equal to the recurrence - // type, it will be eliminated, and should be ignored in the vectorizer - // cost model. - Casts.insert(Cast); - continue; - } - - // Add all operands to the work list if they are loop-varying values that - // we haven't yet visited. - for (Value *O : cast(Val)->operands()) - if (auto *I = dyn_cast(O)) - if (TheLoop->contains(I) && !Visited.count(I)) - Worklist.push_back(I); - } -} - -bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, - Loop *TheLoop, bool HasFunNoNaNAttr, - RecurrenceDescriptor &RedDes, - DemandedBits *DB, - AssumptionCache *AC, - DominatorTree *DT) { - if (Phi->getNumIncomingValues() != 2) - return false; - - // Reduction variables are only found in the loop header block. - if (Phi->getParent() != TheLoop->getHeader()) - return false; - - // Obtain the reduction start value from the value that comes from the loop - // preheader. - Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); - - // ExitInstruction is the single value which is used outside the loop. - // We only allow for a single reduction value to be used outside the loop. - // This includes users of the reduction, variables (which form a cycle - // which ends in the phi node). - Instruction *ExitInstruction = nullptr; - // Indicates that we found a reduction operation in our scan. - bool FoundReduxOp = false; - - // We start with the PHI node and scan for all of the users of this - // instruction. All users must be instructions that can be used as reduction - // variables (such as ADD). We must have a single out-of-block user. The cycle - // must include the original PHI. - bool FoundStartPHI = false; - - // To recognize min/max patterns formed by a icmp select sequence, we store - // the number of instruction we saw from the recognized min/max pattern, - // to make sure we only see exactly the two instructions. - unsigned NumCmpSelectPatternInst = 0; - InstDesc ReduxDesc(false, nullptr); - - // Data used for determining if the recurrence has been type-promoted. - Type *RecurrenceType = Phi->getType(); - SmallPtrSet CastInsts; - Instruction *Start = Phi; - bool IsSigned = false; - - SmallPtrSet VisitedInsts; - SmallVector Worklist; - - // Return early if the recurrence kind does not match the type of Phi. If the - // recurrence kind is arithmetic, we attempt to look through AND operations - // resulting from the type promotion performed by InstCombine. Vector - // operations are not limited to the legal integer widths, so we may be able - // to evaluate the reduction in the narrower width. - if (RecurrenceType->isFloatingPointTy()) { - if (!isFloatingPointRecurrenceKind(Kind)) - return false; - } else { - if (!isIntegerRecurrenceKind(Kind)) - return false; - if (isArithmeticRecurrenceKind(Kind)) - Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); - } - - Worklist.push_back(Start); - VisitedInsts.insert(Start); - - // A value in the reduction can be used: - // - By the reduction: - // - Reduction operation: - // - One use of reduction value (safe). - // - Multiple use of reduction value (not safe). - // - PHI: - // - All uses of the PHI must be the reduction (safe). - // - Otherwise, not safe. - // - By instructions outside of the loop (safe). - // * One value may have several outside users, but all outside - // uses must be of the same value. - // - By an instruction that is not part of the reduction (not safe). - // This is either: - // * An instruction type other than PHI or the reduction operation. - // * A PHI in the header other than the initial PHI. - while (!Worklist.empty()) { - Instruction *Cur = Worklist.back(); - Worklist.pop_back(); - - // No Users. - // If the instruction has no users then this is a broken chain and can't be - // a reduction variable. - if (Cur->use_empty()) - return false; - - bool IsAPhi = isa(Cur); - - // A header PHI use other than the original PHI. - if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) - return false; - - // Reductions of instructions such as Div, and Sub is only possible if the - // LHS is the reduction variable. - if (!Cur->isCommutative() && !IsAPhi && !isa(Cur) && - !isa(Cur) && !isa(Cur) && - !VisitedInsts.count(dyn_cast(Cur->getOperand(0)))) - return false; - - // Any reduction instruction must be of one of the allowed kinds. We ignore - // the starting value (the Phi or an AND instruction if the Phi has been - // type-promoted). - if (Cur != Start) { - ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr); - if (!ReduxDesc.isRecurrence()) - return false; - } - - // A reduction operation must only have one use of the reduction value. - if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax && - hasMultipleUsesOf(Cur, VisitedInsts)) - return false; - - // All inputs to a PHI node must be a reduction value. - if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) - return false; - - if (Kind == RK_IntegerMinMax && - (isa(Cur) || isa(Cur))) - ++NumCmpSelectPatternInst; - if (Kind == RK_FloatMinMax && (isa(Cur) || isa(Cur))) - ++NumCmpSelectPatternInst; - - // Check whether we found a reduction operator. - FoundReduxOp |= !IsAPhi && Cur != Start; - - // Process users of current instruction. Push non-PHI nodes after PHI nodes - // onto the stack. This way we are going to have seen all inputs to PHI - // nodes once we get to them. - SmallVector NonPHIs; - SmallVector PHIs; - for (User *U : Cur->users()) { - Instruction *UI = cast(U); - - // Check if we found the exit user. - BasicBlock *Parent = UI->getParent(); - if (!TheLoop->contains(Parent)) { - // If we already know this instruction is used externally, move on to - // the next user. - if (ExitInstruction == Cur) - continue; - - // Exit if you find multiple values used outside or if the header phi - // node is being used. In this case the user uses the value of the - // previous iteration, in which case we would loose "VF-1" iterations of - // the reduction operation if we vectorize. - if (ExitInstruction != nullptr || Cur == Phi) - return false; - - // The instruction used by an outside user must be the last instruction - // before we feed back to the reduction phi. Otherwise, we loose VF-1 - // operations on the value. - if (!is_contained(Phi->operands(), Cur)) - return false; - - ExitInstruction = Cur; - continue; - } - - // Process instructions only once (termination). Each reduction cycle - // value must only be used once, except by phi nodes and min/max - // reductions which are represented as a cmp followed by a select. - InstDesc IgnoredVal(false, nullptr); - if (VisitedInsts.insert(UI).second) { - if (isa(UI)) - PHIs.push_back(UI); - else - NonPHIs.push_back(UI); - } else if (!isa(UI) && - ((!isa(UI) && !isa(UI) && - !isa(UI)) || - !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())) - return false; - - // Remember that we completed the cycle. - if (UI == Phi) - FoundStartPHI = true; - } - Worklist.append(PHIs.begin(), PHIs.end()); - Worklist.append(NonPHIs.begin(), NonPHIs.end()); - } - - // This means we have seen one but not the other instruction of the - // pattern or more than just a select and cmp. - if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && - NumCmpSelectPatternInst != 2) - return false; - - if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) - return false; - - if (Start != Phi) { - // If the starting value is not the same as the phi node, we speculatively - // looked through an 'and' instruction when evaluating a potential - // arithmetic reduction to determine if it may have been type-promoted. - // - // We now compute the minimal bit width that is required to represent the - // reduction. If this is the same width that was indicated by the 'and', we - // can represent the reduction in the smaller type. The 'and' instruction - // will be eliminated since it will essentially be a cast instruction that - // can be ignore in the cost model. If we compute a different type than we - // did when evaluating the 'and', the 'and' will not be eliminated, and we - // will end up with different kinds of operations in the recurrence - // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is - // the case. - // - // The vectorizer relies on InstCombine to perform the actual - // type-shrinking. It does this by inserting instructions to truncate the - // exit value of the reduction to the width indicated by RecurrenceType and - // then extend this value back to the original width. If IsSigned is false, - // a 'zext' instruction will be generated; otherwise, a 'sext' will be - // used. - // - // TODO: We should not rely on InstCombine to rewrite the reduction in the - // smaller type. We should just generate a correctly typed expression - // to begin with. - Type *ComputedType; - std::tie(ComputedType, IsSigned) = - computeRecurrenceType(ExitInstruction, DB, AC, DT); - if (ComputedType != RecurrenceType) - return false; - - // The recurrence expression will be represented in a narrower type. If - // there are any cast instructions that will be unnecessary, collect them - // in CastInsts. Note that the 'and' instruction was already included in - // this list. - // - // TODO: A better way to represent this may be to tag in some way all the - // instructions that are a part of the reduction. The vectorizer cost - // model could then apply the recurrence type to these instructions, - // without needing a white list of instructions to ignore. - collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); - } - - // We found a reduction var if we have reached the original phi node and we - // only have a single instruction with out-of-loop users. - - // The ExitInstruction(Instruction which is allowed to have out-of-loop users) - // is saved as part of the RecurrenceDescriptor. - - // Save the description of this reduction variable. - RecurrenceDescriptor RD( - RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(), - ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts); - RedDes = RD; - - return true; -} - -/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction -/// pattern corresponding to a min(X, Y) or max(X, Y). -RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { - - assert((isa(I) || isa(I) || isa(I)) && - "Expect a select instruction"); - Instruction *Cmp = nullptr; - SelectInst *Select = nullptr; - - // We must handle the select(cmp()) as a single instruction. Advance to the - // select. - if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) { - if (!Cmp->hasOneUse() || !(Select = dyn_cast(*I->user_begin()))) - return InstDesc(false, I); - return InstDesc(Select, Prev.getMinMaxKind()); - } - - // Only handle single use cases for now. - if (!(Select = dyn_cast(I))) - return InstDesc(false, I); - if (!(Cmp = dyn_cast(I->getOperand(0))) && - !(Cmp = dyn_cast(I->getOperand(0)))) - return InstDesc(false, I); - if (!Cmp->hasOneUse()) - return InstDesc(false, I); - - Value *CmpLeft; - Value *CmpRight; - - // Look for a min/max pattern. - if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_UIntMin); - else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_UIntMax); - else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_SIntMax); - else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_SIntMin); - else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_FloatMin); - else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_FloatMax); - else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_FloatMin); - else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) - return InstDesc(Select, MRK_FloatMax); - - return InstDesc(false, I); -} - -RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, - InstDesc &Prev, bool HasFunNoNaNAttr) { - bool FP = I->getType()->isFloatingPointTy(); - Instruction *UAI = Prev.getUnsafeAlgebraInst(); - if (!UAI && FP && !I->isFast()) - UAI = I; // Found an unsafe (unvectorizable) algebra instruction. - - switch (I->getOpcode()) { - default: - return InstDesc(false, I); - case Instruction::PHI: - return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst()); - case Instruction::Sub: - case Instruction::Add: - return InstDesc(Kind == RK_IntegerAdd, I); - case Instruction::Mul: - return InstDesc(Kind == RK_IntegerMult, I); - case Instruction::And: - return InstDesc(Kind == RK_IntegerAnd, I); - case Instruction::Or: - return InstDesc(Kind == RK_IntegerOr, I); - case Instruction::Xor: - return InstDesc(Kind == RK_IntegerXor, I); - case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult, I, UAI); - case Instruction::FSub: - case Instruction::FAdd: - return InstDesc(Kind == RK_FloatAdd, I, UAI); - case Instruction::FCmp: - case Instruction::ICmp: - case Instruction::Select: - if (Kind != RK_IntegerMinMax && - (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) - return InstDesc(false, I); - return isMinMaxSelectCmpPattern(I, Prev); - } -} - -bool RecurrenceDescriptor::hasMultipleUsesOf( - Instruction *I, SmallPtrSetImpl &Insts) { - unsigned NumUses = 0; - for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; - ++Use) { - if (Insts.count(dyn_cast(*Use))) - ++NumUses; - if (NumUses > 1) - return true; - } - - return false; -} -bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, - RecurrenceDescriptor &RedDes, - DemandedBits *DB, AssumptionCache *AC, - DominatorTree *DT) { - - BasicBlock *Header = TheLoop->getHeader(); - Function &F = *Header->getParent(); - bool HasFunNoNaNAttr = - F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - - if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, - DB, AC, DT)) { - DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); - return true; - } - if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, - AC, DT)) { - DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); - return true; - } - // Not a reduction of known type. - return false; -} - -bool RecurrenceDescriptor::isFirstOrderRecurrence( - PHINode *Phi, Loop *TheLoop, - DenseMap &SinkAfter, DominatorTree *DT) { - - // Ensure the phi node is in the loop header and has two incoming values. - if (Phi->getParent() != TheLoop->getHeader() || - Phi->getNumIncomingValues() != 2) - return false; - - // Ensure the loop has a preheader and a single latch block. The loop - // vectorizer will need the latch to set up the next iteration of the loop. - auto *Preheader = TheLoop->getLoopPreheader(); - auto *Latch = TheLoop->getLoopLatch(); - if (!Preheader || !Latch) - return false; - - // Ensure the phi node's incoming blocks are the loop preheader and latch. - if (Phi->getBasicBlockIndex(Preheader) < 0 || - Phi->getBasicBlockIndex(Latch) < 0) - return false; - - // Get the previous value. The previous value comes from the latch edge while - // the initial value comes form the preheader edge. - auto *Previous = dyn_cast(Phi->getIncomingValueForBlock(Latch)); - if (!Previous || !TheLoop->contains(Previous) || isa(Previous) || - SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. - return false; - - // Ensure every user of the phi node is dominated by the previous value. - // The dominance requirement ensures the loop vectorizer will not need to - // vectorize the initial value prior to the first iteration of the loop. - // TODO: Consider extending this sinking to handle other kinds of instructions - // and expressions, beyond sinking a single cast past Previous. - if (Phi->hasOneUse()) { - auto *I = Phi->user_back(); - if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() && - DT->dominates(Previous, I->user_back())) { - if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking. - SinkAfter[I] = Previous; - return true; - } - } - - for (User *U : Phi->users()) - if (auto *I = dyn_cast(U)) { - if (!DT->dominates(Previous, I)) - return false; - } - - return true; -} - -/// This function returns the identity element (or neutral element) for -/// the operation K. -Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K, - Type *Tp) { - switch (K) { - case RK_IntegerXor: - case RK_IntegerAdd: - case RK_IntegerOr: - // Adding, Xoring, Oring zero to a number does not change it. - return ConstantInt::get(Tp, 0); - case RK_IntegerMult: - // Multiplying a number by 1 does not change it. - return ConstantInt::get(Tp, 1); - case RK_IntegerAnd: - // AND-ing a number with an all-1 value does not change it. - return ConstantInt::get(Tp, -1, true); - case RK_FloatMult: - // Multiplying a number by 1 does not change it. - return ConstantFP::get(Tp, 1.0L); - case RK_FloatAdd: - // Adding zero to a number does not change it. - return ConstantFP::get(Tp, 0.0L); - default: - llvm_unreachable("Unknown recurrence kind"); - } -} - -/// This function translates the recurrence kind to an LLVM binary operator. -unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) { - switch (Kind) { - case RK_IntegerAdd: - return Instruction::Add; - case RK_IntegerMult: - return Instruction::Mul; - case RK_IntegerOr: - return Instruction::Or; - case RK_IntegerAnd: - return Instruction::And; - case RK_IntegerXor: - return Instruction::Xor; - case RK_FloatMult: - return Instruction::FMul; - case RK_FloatAdd: - return Instruction::FAdd; - case RK_IntegerMinMax: - return Instruction::ICmp; - case RK_FloatMinMax: - return Instruction::FCmp; - default: - llvm_unreachable("Unknown recurrence operation"); - } -} - -Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, - MinMaxRecurrenceKind RK, - Value *Left, Value *Right) { - CmpInst::Predicate P = CmpInst::ICMP_NE; - switch (RK) { - default: - llvm_unreachable("Unknown min/max recurrence kind"); - case MRK_UIntMin: - P = CmpInst::ICMP_ULT; - break; - case MRK_UIntMax: - P = CmpInst::ICMP_UGT; - break; - case MRK_SIntMin: - P = CmpInst::ICMP_SLT; - break; - case MRK_SIntMax: - P = CmpInst::ICMP_SGT; - break; - case MRK_FloatMin: - P = CmpInst::FCMP_OLT; - break; - case MRK_FloatMax: - P = CmpInst::FCMP_OGT; - break; - } - - // We only match FP sequences that are 'fast', so we can unconditionally - // set it on any generated instructions. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - FastMathFlags FMF; - FMF.setFast(); - Builder.setFastMathFlags(FMF); - - Value *Cmp; - if (RK == MRK_FloatMin || RK == MRK_FloatMax) - Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); - else - Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); - - Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); - return Select; -} - -InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - const SCEV *Step, BinaryOperator *BOp, - SmallVectorImpl *Casts) - : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { - assert(IK != IK_NoInduction && "Not an induction"); - - // Start value type should match the induction kind and the value - // itself should not be null. - assert(StartValue && "StartValue is null"); - assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && - "StartValue is not a pointer for pointer induction"); - assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && - "StartValue is not an integer for integer induction"); - - // Check the Step Value. It should be non-zero integer value. - assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && - "Step value is zero"); - - assert((IK != IK_PtrInduction || getConstIntStepValue()) && - "Step value should be constant for pointer induction"); - assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && - "StepValue is not an integer"); - - assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && - "StepValue is not FP for FpInduction"); - assert((IK != IK_FpInduction || (InductionBinOp && - (InductionBinOp->getOpcode() == Instruction::FAdd || - InductionBinOp->getOpcode() == Instruction::FSub))) && - "Binary opcode should be specified for FP induction"); - - if (Casts) { - for (auto &Inst : *Casts) { - RedundantCasts.push_back(Inst); - } - } -} - -int InductionDescriptor::getConsecutiveDirection() const { - ConstantInt *ConstStep = getConstIntStepValue(); - if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne())) - return ConstStep->getSExtValue(); - return 0; -} - -ConstantInt *InductionDescriptor::getConstIntStepValue() const { - if (isa(Step)) - return dyn_cast(cast(Step)->getValue()); - return nullptr; -} - -Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, - ScalarEvolution *SE, - const DataLayout& DL) const { - - SCEVExpander Exp(*SE, DL, "induction"); - assert(Index->getType() == Step->getType() && - "Index type does not match StepValue type"); - switch (IK) { - case IK_IntInduction: { - assert(Index->getType() == StartValue->getType() && - "Index type does not match StartValue type"); - - // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution - // and calculate (Start + Index * Step) for all cases, without - // special handling for "isOne" and "isMinusOne". - // But in the real life the result code getting worse. We mix SCEV - // expressions and ADD/SUB operations and receive redundant - // intermediate values being calculated in different ways and - // Instcombine is unable to reduce them all. - - if (getConstIntStepValue() && - getConstIntStepValue()->isMinusOne()) - return B.CreateSub(StartValue, Index); - if (getConstIntStepValue() && - getConstIntStepValue()->isOne()) - return B.CreateAdd(StartValue, Index); - const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue), - SE->getMulExpr(Step, SE->getSCEV(Index))); - return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint()); - } - case IK_PtrInduction: { - assert(isa(Step) && - "Expected constant step for pointer induction"); - const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step); - Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint()); - return B.CreateGEP(nullptr, StartValue, Index); - } - case IK_FpInduction: { - assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); - assert(InductionBinOp && - (InductionBinOp->getOpcode() == Instruction::FAdd || - InductionBinOp->getOpcode() == Instruction::FSub) && - "Original bin op should be defined for FP induction"); - - Value *StepValue = cast(Step)->getValue(); - - // Floating point operations had to be 'fast' to enable the induction. - FastMathFlags Flags; - Flags.setFast(); - - Value *MulExp = B.CreateFMul(StepValue, Index); - if (isa(MulExp)) - // We have to check, the MulExp may be a constant. - cast(MulExp)->setFastMathFlags(Flags); - - Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue, - MulExp, "induction"); - if (isa(BOp)) - cast(BOp)->setFastMathFlags(Flags); - - return BOp; - } - case IK_NoInduction: - return nullptr; - } - llvm_unreachable("invalid enum"); -} - -bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, - ScalarEvolution *SE, - InductionDescriptor &D) { - - // Here we only handle FP induction variables. - assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); - - if (TheLoop->getHeader() != Phi->getParent()) - return false; - - // The loop may have multiple entrances or multiple exits; we can analyze - // this phi if it has a unique entry value and a unique backedge value. - if (Phi->getNumIncomingValues() != 2) - return false; - Value *BEValue = nullptr, *StartValue = nullptr; - if (TheLoop->contains(Phi->getIncomingBlock(0))) { - BEValue = Phi->getIncomingValue(0); - StartValue = Phi->getIncomingValue(1); - } else { - assert(TheLoop->contains(Phi->getIncomingBlock(1)) && - "Unexpected Phi node in the loop"); - BEValue = Phi->getIncomingValue(1); - StartValue = Phi->getIncomingValue(0); - } - - BinaryOperator *BOp = dyn_cast(BEValue); - if (!BOp) - return false; - - Value *Addend = nullptr; - if (BOp->getOpcode() == Instruction::FAdd) { - if (BOp->getOperand(0) == Phi) - Addend = BOp->getOperand(1); - else if (BOp->getOperand(1) == Phi) - Addend = BOp->getOperand(0); - } else if (BOp->getOpcode() == Instruction::FSub) - if (BOp->getOperand(0) == Phi) - Addend = BOp->getOperand(1); - - if (!Addend) - return false; - - // The addend should be loop invariant - if (auto *I = dyn_cast(Addend)) - if (TheLoop->contains(I)) - return false; - - // FP Step has unknown SCEV - const SCEV *Step = SE->getUnknown(Addend); - D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); - return true; -} - -/// This function is called when we suspect that the update-chain of a phi node -/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, -/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime -/// predicate P under which the SCEV expression for the phi can be the -/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the -/// cast instructions that are involved in the update-chain of this induction. -/// A caller that adds the required runtime predicate can be free to drop these -/// cast instructions, and compute the phi using \p AR (instead of some scev -/// expression with casts). -/// -/// For example, without a predicate the scev expression can take the following -/// form: -/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) -/// -/// It corresponds to the following IR sequence: -/// %for.body: -/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] -/// %casted_phi = "ExtTrunc i64 %x" -/// %add = add i64 %casted_phi, %step -/// -/// where %x is given in \p PN, -/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, -/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of -/// several forms, for example, such as: -/// ExtTrunc1: %casted_phi = and %x, 2^n-1 -/// or: -/// ExtTrunc2: %t = shl %x, m -/// %casted_phi = ashr %t, m -/// -/// If we are able to find such sequence, we return the instructions -/// we found, namely %casted_phi and the instructions on its use-def chain up -/// to the phi (not including the phi). -static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, - const SCEVUnknown *PhiScev, - const SCEVAddRecExpr *AR, - SmallVectorImpl &CastInsts) { - - assert(CastInsts.empty() && "CastInsts is expected to be empty."); - auto *PN = cast(PhiScev->getValue()); - assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); - const Loop *L = AR->getLoop(); - - // Find any cast instructions that participate in the def-use chain of - // PhiScev in the loop. - // FORNOW/TODO: We currently expect the def-use chain to include only - // two-operand instructions, where one of the operands is an invariant. - // createAddRecFromPHIWithCasts() currently does not support anything more - // involved than that, so we keep the search simple. This can be - // extended/generalized as needed. - - auto getDef = [&](const Value *Val) -> Value * { - const BinaryOperator *BinOp = dyn_cast(Val); - if (!BinOp) - return nullptr; - Value *Op0 = BinOp->getOperand(0); - Value *Op1 = BinOp->getOperand(1); - Value *Def = nullptr; - if (L->isLoopInvariant(Op0)) - Def = Op1; - else if (L->isLoopInvariant(Op1)) - Def = Op0; - return Def; - }; - - // Look for the instruction that defines the induction via the - // loop backedge. - BasicBlock *Latch = L->getLoopLatch(); - if (!Latch) - return false; - Value *Val = PN->getIncomingValueForBlock(Latch); - if (!Val) - return false; - - // Follow the def-use chain until the induction phi is reached. - // If on the way we encounter a Value that has the same SCEV Expr as the - // phi node, we can consider the instructions we visit from that point - // as part of the cast-sequence that can be ignored. - bool InCastSequence = false; - auto *Inst = dyn_cast(Val); - while (Val != PN) { - // If we encountered a phi node other than PN, or if we left the loop, - // we bail out. - if (!Inst || !L->contains(Inst)) { - return false; - } - auto *AddRec = dyn_cast(PSE.getSCEV(Val)); - if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) - InCastSequence = true; - if (InCastSequence) { - // Only the last instruction in the cast sequence is expected to have - // uses outside the induction def-use chain. - if (!CastInsts.empty()) - if (!Inst->hasOneUse()) - return false; - CastInsts.push_back(Inst); - } - Val = getDef(Val); - if (!Val) - return false; - Inst = dyn_cast(Val); - } - - return InCastSequence; -} - -bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, - PredicatedScalarEvolution &PSE, - InductionDescriptor &D, - bool Assume) { - Type *PhiTy = Phi->getType(); - - // Handle integer and pointer inductions variables. - // Now we handle also FP induction but not trying to make a - // recurrent expression from the PHI node in-place. - - if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && - !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) - return false; - - if (PhiTy->isFloatingPointTy()) - return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); - - const SCEV *PhiScev = PSE.getSCEV(Phi); - const auto *AR = dyn_cast(PhiScev); - - // We need this expression to be an AddRecExpr. - if (Assume && !AR) - AR = PSE.getAsAddRec(Phi); - - if (!AR) { - DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return false; - } - - // Record any Cast instructions that participate in the induction update - const auto *SymbolicPhi = dyn_cast(PhiScev); - // If we started from an UnknownSCEV, and managed to build an addRecurrence - // only after enabling Assume with PSCEV, this means we may have encountered - // cast instructions that required adding a runtime check in order to - // guarantee the correctness of the AddRecurence respresentation of the - // induction. - if (PhiScev != AR && SymbolicPhi) { - SmallVector Casts; - if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) - return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); - } - - return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); -} - -bool InductionDescriptor::isInductionPHI( - PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, - InductionDescriptor &D, const SCEV *Expr, - SmallVectorImpl *CastsToIgnore) { - Type *PhiTy = Phi->getType(); - // We only handle integer and pointer inductions variables. - if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) - return false; - - // Check that the PHI is consecutive. - const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); - const SCEVAddRecExpr *AR = dyn_cast(PhiScev); - - if (!AR) { - DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return false; - } - - if (AR->getLoop() != TheLoop) { - // FIXME: We should treat this as a uniform. Unfortunately, we - // don't currently know how to handled uniform PHIs. - DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); - return false; - } - - Value *StartValue = - Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); - const SCEV *Step = AR->getStepRecurrence(*SE); - // Calculate the pointer stride and check if it is consecutive. - // The stride may be a constant or a loop invariant integer value. - const SCEVConstant *ConstStep = dyn_cast(Step); - if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) - return false; - - if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, - CastsToIgnore); - return true; - } - - assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); - // Pointer induction should be a constant. - if (!ConstStep) - return false; - - ConstantInt *CV = ConstStep->getValue(); - Type *PointerElementType = PhiTy->getPointerElementType(); - // The pointer stride cannot be determined if the pointer element type is not - // sized. - if (!PointerElementType->isSized()) - return false; - - const DataLayout &DL = Phi->getModule()->getDataLayout(); - int64_t Size = static_cast(DL.getTypeAllocSize(PointerElementType)); - if (!Size) - return false; - - int64_t CVSize = CV->getSExtValue(); - if (CVSize % Size) - return false; - auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size, - true /* signed */); - D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue); - return true; -} - bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { bool Changed = false; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -19,7 +19,7 @@ // This pass has three parts: // 1. The main loop pass that drives the different parts. // 2. LoopVectorizationLegality - A unit that checks for the legality -// of the vectorization. +// of the vectorization. [Moved out to Analysis] // 3. InnerLoopVectorizer - A unit that performs the actual // widening of instructions. // 4. LoopVectorizationCostModel - A unit that checks for the profitability @@ -76,12 +76,14 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopVectorizationLegality.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/LoopUtils.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -121,7 +123,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include #include @@ -144,10 +145,6 @@ STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); -static cl::opt - EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, - cl::desc("Enable if-conversion during vectorization.")); - /// Loops with a known constant trip count below this number are vectorized only /// if no scalar iteration overheads are incurred. static cl::opt TinyTripCountVectorThreshold( @@ -183,9 +180,6 @@ "force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")); -/// Maximum vectorization interleave count. -static const unsigned MaxInterleaveFactor = 16; - static cl::opt ForceTargetMaxScalarInterleaveFactor( "force-target-max-scalar-interleave", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's max interleave factor for " @@ -237,52 +231,6 @@ cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")); -static cl::opt PragmaVectorizeMemoryCheckThreshold( - "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, - cl::desc("The maximum allowed number of runtime memory checks with a " - "vectorize(enable) pragma.")); - -static cl::opt VectorizeSCEVCheckThreshold( - "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, - cl::desc("The maximum number of SCEV checks allowed.")); - -static cl::opt PragmaVectorizeSCEVCheckThreshold( - "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, - cl::desc("The maximum number of SCEV checks allowed with a " - "vectorize(enable) pragma")); - -/// Create an analysis remark that explains why vectorization failed -/// -/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p -/// RemarkName is the identifier for the remark. If \p I is passed it is an -/// instruction that prevents vectorization. Otherwise \p TheLoop is used for -/// the location of the remark. \return the remark object that can be -/// streamed to. -static OptimizationRemarkAnalysis -createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, - Instruction *I = nullptr) { - Value *CodeRegion = TheLoop->getHeader(); - DebugLoc DL = TheLoop->getStartLoc(); - - if (I) { - CodeRegion = I->getParent(); - // If there is no debug location attached to the instruction, revert back to - // using the loop's. - if (I->getDebugLoc()) - DL = I->getDebugLoc(); - } - - OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); - R << "loop not vectorized: "; - return R; -} - -namespace { - -class LoopVectorizationRequirements; - -} // end anonymous namespace - /// A helper function for converting Scalar types to vector types. /// If the incoming type is void, we return void. If the VF is 1, we return /// the scalar type. @@ -1164,315 +1112,6 @@ } }; -/// Utility class for getting and setting loop vectorizer hints in the form -/// of loop metadata. -/// This class keeps a number of loop annotations locally (as member variables) -/// and can, upon request, write them back as metadata on the loop. It will -/// initially scan the loop for existing metadata, and will update the local -/// values based on information in the loop. -/// We cannot write all values to metadata, as the mere presence of some info, -/// for example 'force', means a decision has been made. So, we need to be -/// careful NOT to add them if the user hasn't specifically asked so. -class LoopVectorizeHints { - enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED }; - - /// Hint - associates name and validation with the hint value. - struct Hint { - const char *Name; - unsigned Value; // This may have to change for non-numeric values. - HintKind Kind; - - Hint(const char *Name, unsigned Value, HintKind Kind) - : Name(Name), Value(Value), Kind(Kind) {} - - bool validate(unsigned Val) { - switch (Kind) { - case HK_WIDTH: - return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; - case HK_UNROLL: - return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; - case HK_FORCE: - return (Val <= 1); - case HK_ISVECTORIZED: - return (Val==0 || Val==1); - } - return false; - } - }; - - /// Vectorization width. - Hint Width; - - /// Vectorization interleave factor. - Hint Interleave; - - /// Vectorization forced - Hint Force; - - /// Already Vectorized - Hint IsVectorized; - - /// Return the loop metadata prefix. - static StringRef Prefix() { return "llvm.loop."; } - - /// True if there is any unsafe math in the loop. - bool PotentiallyUnsafe = false; - -public: - enum ForceKind { - FK_Undefined = -1, ///< Not selected. - FK_Disabled = 0, ///< Forcing disabled. - FK_Enabled = 1, ///< Forcing enabled. - }; - - LoopVectorizeHints(const Loop *L, bool DisableInterleaving, - OptimizationRemarkEmitter &ORE) - : Width("vectorize.width", VectorizerParams::VectorizationFactor, - HK_WIDTH), - Interleave("interleave.count", DisableInterleaving, HK_UNROLL), - Force("vectorize.enable", FK_Undefined, HK_FORCE), - IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { - // Populate values with existing loop metadata. - getHintsFromMetadata(); - - // force-vector-interleave overrides DisableInterleaving. - if (VectorizerParams::isInterleaveForced()) - Interleave.Value = VectorizerParams::VectorizationInterleave; - - if (IsVectorized.Value != 1) - // If the vectorization width and interleaving count are both 1 then - // consider the loop to have been already vectorized because there's - // nothing more that we can do. - IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; - DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() - << "LV: Interleaving disabled by the pass manager\n"); - } - - /// Mark the loop L as already vectorized by setting the width to 1. - void setAlreadyVectorized() { - IsVectorized.Value = 1; - Hint Hints[] = {IsVectorized}; - writeHintsToMetadata(Hints); - } - - bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const { - if (getForce() == LoopVectorizeHints::FK_Disabled) { - DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); - emitRemarkWithHints(); - return false; - } - - if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { - DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); - emitRemarkWithHints(); - return false; - } - - if (getIsVectorized() == 1) { - DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); - // FIXME: Add interleave.disable metadata. This will allow - // vectorize.disable to be used without disabling the pass and errors - // to differentiate between disabled vectorization and a width of 1. - ORE.emit([&]() { - return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), - "AllDisabled", L->getStartLoc(), - L->getHeader()) - << "loop not vectorized: vectorization and interleaving are " - "explicitly disabled, or the loop has already been " - "vectorized"; - }); - return false; - } - - return true; - } - - /// Dumps all the hint information. - void emitRemarkWithHints() const { - using namespace ore; - - ORE.emit([&]() { - if (Force.Value == LoopVectorizeHints::FK_Disabled) - return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", - TheLoop->getStartLoc(), - TheLoop->getHeader()) - << "loop not vectorized: vectorization is explicitly disabled"; - else { - OptimizationRemarkMissed R(LV_NAME, "MissedDetails", - TheLoop->getStartLoc(), - TheLoop->getHeader()); - R << "loop not vectorized"; - if (Force.Value == LoopVectorizeHints::FK_Enabled) { - R << " (Force=" << NV("Force", true); - if (Width.Value != 0) - R << ", Vector Width=" << NV("VectorWidth", Width.Value); - if (Interleave.Value != 0) - R << ", Interleave Count=" - << NV("InterleaveCount", Interleave.Value); - R << ")"; - } - return R; - } - }); - } - - unsigned getWidth() const { return Width.Value; } - unsigned getInterleave() const { return Interleave.Value; } - unsigned getIsVectorized() const { return IsVectorized.Value; } - enum ForceKind getForce() const { return (ForceKind)Force.Value; } - - /// \brief If hints are provided that force vectorization, use the AlwaysPrint - /// pass name to force the frontend to print the diagnostic. - const char *vectorizeAnalysisPassName() const { - if (getWidth() == 1) - return LV_NAME; - if (getForce() == LoopVectorizeHints::FK_Disabled) - return LV_NAME; - if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) - return LV_NAME; - return OptimizationRemarkAnalysis::AlwaysPrint; - } - - bool allowReordering() const { - // When enabling loop hints are provided we allow the vectorizer to change - // the order of operations that is given by the scalar loop. This is not - // enabled by default because can be unsafe or inefficient. For example, - // reordering floating-point operations will change the way round-off - // error accumulates in the loop. - return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; - } - - bool isPotentiallyUnsafe() const { - // Avoid FP vectorization if the target is unsure about proper support. - // This may be related to the SIMD unit in the target not handling - // IEEE 754 FP ops properly, or bad single-to-double promotions. - // Otherwise, a sequence of vectorized loops, even without reduction, - // could lead to different end results on the destination vectors. - return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; - } - - void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } - -private: - /// Find hints specified in the loop metadata and update local values. - void getHintsFromMetadata() { - MDNode *LoopID = TheLoop->getLoopID(); - if (!LoopID) - return; - - // First operand should refer to the loop id itself. - assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); - assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); - - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - const MDString *S = nullptr; - SmallVector Args; - - // The expected hint is either a MDString or a MDNode with the first - // operand a MDString. - if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { - if (!MD || MD->getNumOperands() == 0) - continue; - S = dyn_cast(MD->getOperand(0)); - for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) - Args.push_back(MD->getOperand(i)); - } else { - S = dyn_cast(LoopID->getOperand(i)); - assert(Args.size() == 0 && "too many arguments for MDString"); - } - - if (!S) - continue; - - // Check if the hint starts with the loop metadata prefix. - StringRef Name = S->getString(); - if (Args.size() == 1) - setHint(Name, Args[0]); - } - } - - /// Checks string hint with one operand and set value if valid. - void setHint(StringRef Name, Metadata *Arg) { - if (!Name.startswith(Prefix())) - return; - Name = Name.substr(Prefix().size(), StringRef::npos); - - const ConstantInt *C = mdconst::dyn_extract(Arg); - if (!C) - return; - unsigned Val = C->getZExtValue(); - - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized}; - for (auto H : Hints) { - if (Name == H->Name) { - if (H->validate(Val)) - H->Value = Val; - else - DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); - break; - } - } - } - - /// Create a new hint from name / value pair. - MDNode *createHintMetadata(StringRef Name, unsigned V) const { - LLVMContext &Context = TheLoop->getHeader()->getContext(); - Metadata *MDs[] = {MDString::get(Context, Name), - ConstantAsMetadata::get( - ConstantInt::get(Type::getInt32Ty(Context), V))}; - return MDNode::get(Context, MDs); - } - - /// Matches metadata with hint name. - bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes) { - MDString *Name = dyn_cast(Node->getOperand(0)); - if (!Name) - return false; - - for (auto H : HintTypes) - if (Name->getString().endswith(H.Name)) - return true; - return false; - } - - /// Sets current hints into loop metadata, keeping other values intact. - void writeHintsToMetadata(ArrayRef HintTypes) { - if (HintTypes.empty()) - return; - - // Reserve the first element to LoopID (see below). - SmallVector MDs(1); - // If the loop already has metadata, then ignore the existing operands. - MDNode *LoopID = TheLoop->getLoopID(); - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *Node = cast(LoopID->getOperand(i)); - // If node in update list, ignore old value. - if (!matchesHintMetadataName(Node, HintTypes)) - MDs.push_back(Node); - } - } - - // Now, add the missing hints. - for (auto H : HintTypes) - MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); - - // Replace current metadata node with new one. - LLVMContext &Context = TheLoop->getHeader()->getContext(); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - - TheLoop->setLoopID(NewLoopID); - } - - /// The loop these hints belong to. - const Loop *TheLoop; - - /// Interface to emit optimization remarks. - OptimizationRemarkEmitter &ORE; -}; - } // end anonymous namespace static void emitMissedWarning(Function *F, Loop *L, @@ -1498,259 +1137,6 @@ namespace llvm { -/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and -/// to what vectorization factor. -/// This class does not look at the profitability of vectorization, only the -/// legality. This class has two main kinds of checks: -/// * Memory checks - The code in canVectorizeMemory checks if vectorization -/// will change the order of memory accesses in a way that will change the -/// correctness of the program. -/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory -/// checks for a number of different conditions, such as the availability of a -/// single induction variable, that all types are supported and vectorize-able, -/// etc. This code reflects the capabilities of InnerLoopVectorizer. -/// This class is also used by InnerLoopVectorizer for identifying -/// induction variable and the different reduction variables. -class LoopVectorizationLegality { -public: - LoopVectorizationLegality( - Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, - TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, - std::function *GetLAA, LoopInfo *LI, - OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, - LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) - : TheLoop(L), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA), - ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} - - /// ReductionList contains the reduction descriptors for all - /// of the reductions that were found in the loop. - using ReductionList = DenseMap; - - /// InductionList saves induction variables and maps them to the - /// induction descriptor. - using InductionList = MapVector; - - /// RecurrenceSet contains the phi nodes that are recurrences other than - /// inductions and reductions. - using RecurrenceSet = SmallPtrSet; - - /// Returns true if it is legal to vectorize this loop. - /// This does not mean that it is profitable to vectorize this - /// loop, only that it is legal to do so. - bool canVectorize(); - - /// Returns the primary induction variable. - PHINode *getPrimaryInduction() { return PrimaryInduction; } - - /// Returns the reduction variables found in the loop. - ReductionList *getReductionVars() { return &Reductions; } - - /// Returns the induction variables found in the loop. - InductionList *getInductionVars() { return &Inductions; } - - /// Return the first-order recurrences found in the loop. - RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; } - - /// Return the set of instructions to sink to handle first-order recurrences. - DenseMap &getSinkAfter() { return SinkAfter; } - - /// Returns the widest induction type. - Type *getWidestInductionType() { return WidestIndTy; } - - /// Returns True if V is a Phi node of an induction variable in this loop. - bool isInductionPhi(const Value *V); - - /// Returns True if V is a cast that is part of an induction def-use chain, - /// and had been proven to be redundant under a runtime guard (in other - /// words, the cast has the same SCEV expression as the induction phi). - bool isCastedInductionVariable(const Value *V); - - /// Returns True if V can be considered as an induction variable in this - /// loop. V can be the induction phi, or some redundant cast in the def-use - /// chain of the inducion phi. - bool isInductionVariable(const Value *V); - - /// Returns True if PN is a reduction variable in this loop. - bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } - - /// Returns True if Phi is a first-order recurrence in this loop. - bool isFirstOrderRecurrence(const PHINode *Phi); - - /// Return true if the block BB needs to be predicated in order for the loop - /// to be vectorized. - bool blockNeedsPredication(BasicBlock *BB); - - /// Check if this pointer is consecutive when vectorizing. This happens - /// when the last index of the GEP is the induction variable, or that the - /// pointer itself is an induction variable. - /// This check allows us to vectorize A[idx] into a wide load/store. - /// Returns: - /// 0 - Stride is unknown or non-consecutive. - /// 1 - Address is consecutive. - /// -1 - Address is consecutive, and decreasing. - /// NOTE: This method must only be used before modifying the original scalar - /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). - int isConsecutivePtr(Value *Ptr); - - /// Returns true if the value V is uniform within the loop. - bool isUniform(Value *V); - - /// Returns the information that we collected about runtime memory check. - const RuntimePointerChecking *getRuntimePointerChecking() const { - return LAI->getRuntimePointerChecking(); - } - - const LoopAccessInfo *getLAI() const { return LAI; } - - unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } - - uint64_t getMaxSafeRegisterWidth() const { - return LAI->getDepChecker().getMaxSafeRegisterWidth(); - } - - bool hasStride(Value *V) { return LAI->hasStride(V); } - - /// Returns true if vector representation of the instruction \p I - /// requires mask. - bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } - - unsigned getNumStores() const { return LAI->getNumStores(); } - unsigned getNumLoads() const { return LAI->getNumLoads(); } - - // Returns true if the NoNaN attribute is set on the function. - bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } - -private: - /// Check if a single basic block loop is vectorizable. - /// At this point we know that this is a loop with a constant trip count - /// and we only need to check individual instructions. - bool canVectorizeInstrs(); - - /// When we vectorize loops we may change the order in which - /// we read and write from memory. This method checks if it is - /// legal to vectorize the code, considering only memory constrains. - /// Returns true if the loop is vectorizable - bool canVectorizeMemory(); - - /// Return true if we can vectorize this loop using the IF-conversion - /// transformation. - bool canVectorizeWithIfConvert(); - - /// Return true if all of the instructions in the block can be speculatively - /// executed. \p SafePtrs is a list of addresses that are known to be legal - /// and we know that we can read from them without segfault. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs); - - /// Updates the vectorization state by adding \p Phi to the inductions list. - /// This can set \p Phi as the main induction of the loop if \p Phi is a - /// better choice for the main induction than the existing one. - void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, - SmallPtrSetImpl &AllowedExit); - - /// Create an analysis remark that explains why vectorization failed - /// - /// \p RemarkName is the identifier for the remark. If \p I is passed it is - /// an instruction that prevents vectorization. Otherwise the loop is used - /// for the location of the remark. \return the remark object that can be - /// streamed to. - OptimizationRemarkAnalysis - createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const { - return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(), - RemarkName, TheLoop, I); - } - - /// \brief If an access has a symbolic strides, this maps the pointer value to - /// the stride symbol. - const ValueToValueMap *getSymbolicStrides() { - // FIXME: Currently, the set of symbolic strides is sometimes queried before - // it's collected. This happens from canVectorizeWithIfConvert, when the - // pointer is checked to reference consecutive elements suitable for a - // masked access. - return LAI ? &LAI->getSymbolicStrides() : nullptr; - } - - /// The loop that we evaluate. - Loop *TheLoop; - - /// A wrapper around ScalarEvolution used to add runtime SCEV checks. - /// Applies dynamic knowledge to simplify SCEV expressions in the context - /// of existing SCEV assumptions. The analysis will also add a minimal set - /// of new predicates if this is required to enable vectorization and - /// unrolling. - PredicatedScalarEvolution &PSE; - - /// Target Library Info. - TargetLibraryInfo *TLI; - - /// Dominator Tree. - DominatorTree *DT; - - // LoopAccess analysis. - std::function *GetLAA; - - // And the loop-accesses info corresponding to this loop. This pointer is - // null until canVectorizeMemory sets it up. - const LoopAccessInfo *LAI = nullptr; - - /// Interface to emit optimization remarks. - OptimizationRemarkEmitter *ORE; - - // --- vectorization state --- // - - /// Holds the primary induction variable. This is the counter of the - /// loop. - PHINode *PrimaryInduction = nullptr; - - /// Holds the reduction variables. - ReductionList Reductions; - - /// Holds all of the induction variables that we found in the loop. - /// Notice that inductions don't need to start at zero and that induction - /// variables can be pointers. - InductionList Inductions; - - /// Holds all the casts that participate in the update chain of the induction - /// variables, and that have been proven to be redundant (possibly under a - /// runtime guard). These casts can be ignored when creating the vectorized - /// loop body. - SmallPtrSet InductionCastsToIgnore; - - /// Holds the phi nodes that are first-order recurrences. - RecurrenceSet FirstOrderRecurrences; - - /// Holds instructions that need to sink past other instructions to handle - /// first-order recurrences. - DenseMap SinkAfter; - - /// Holds the widest induction type encountered. - Type *WidestIndTy = nullptr; - - /// Allowed outside users. This holds the induction and reduction - /// vars which can be accessed from outside the loop. - SmallPtrSet AllowedExit; - - /// Can we assume the absence of NaNs. - bool HasFunNoNaNAttr = false; - - /// Vectorization requirements that will go through late-evaluation. - LoopVectorizationRequirements *Requirements; - - /// Used to emit an analysis of any legality issues. - LoopVectorizeHints *Hints; - - /// The demanded bits analsyis is used to compute the minimum type size in - /// which a reduction can be computed. - DemandedBits *DB; - - /// The assumption cache analysis is used to compute the minimum type size in - /// which a reduction can be computed. - AssumptionCache *AC; - - /// While vectorizing these instructions we have to generate a - /// call to the appropriate masked intrinsic - SmallPtrSet MaskedOp; -}; - /// LoopVectorizationCostModel - estimates the expected speedups due to /// vectorization. /// In many cases vectorization is not profitable. This can happen because of @@ -2088,7 +1474,7 @@ /// \p RemarkName is the identifier for the remark. \return the remark object /// that can be streamed to. OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) { - return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(), + return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(), RemarkName, TheLoop); } @@ -2203,78 +1589,6 @@ } // end namespace llvm -namespace { - -/// \brief This holds vectorization requirements that must be verified late in -/// the process. The requirements are set by legalize and costmodel. Once -/// vectorization has been determined to be possible and profitable the -/// requirements can be verified by looking for metadata or compiler options. -/// For example, some loops require FP commutativity which is only allowed if -/// vectorization is explicitly specified or if the fast-math compiler option -/// has been provided. -/// Late evaluation of these requirements allows helpful diagnostics to be -/// composed that tells the user what need to be done to vectorize the loop. For -/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late -/// evaluation should be used only when diagnostics can generated that can be -/// followed by a non-expert user. -class LoopVectorizationRequirements { -public: - LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} - - void addUnsafeAlgebraInst(Instruction *I) { - // First unsafe algebra instruction. - if (!UnsafeAlgebraInst) - UnsafeAlgebraInst = I; - } - - void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } - - bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) { - const char *PassName = Hints.vectorizeAnalysisPassName(); - bool Failed = false; - if (UnsafeAlgebraInst && !Hints.allowReordering()) { - ORE.emit([&]() { - return OptimizationRemarkAnalysisFPCommute( - PassName, "CantReorderFPOps", - UnsafeAlgebraInst->getDebugLoc(), - UnsafeAlgebraInst->getParent()) - << "loop not vectorized: cannot prove it is safe to reorder " - "floating-point operations"; - }); - Failed = true; - } - - // Test if runtime memcheck thresholds are exceeded. - bool PragmaThresholdReached = - NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; - bool ThresholdReached = - NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; - if ((ThresholdReached && !Hints.allowReordering()) || - PragmaThresholdReached) { - ORE.emit([&]() { - return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", - L->getStartLoc(), - L->getHeader()) - << "loop not vectorized: cannot prove it is safe to reorder " - "memory operations"; - }); - DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); - Failed = true; - } - - return Failed; - } - -private: - unsigned NumRuntimePointerChecks = 0; - Instruction *UnsafeAlgebraInst = nullptr; - - /// Interface to emit optimization remarks. - OptimizationRemarkEmitter &ORE; -}; - -} // end anonymous namespace - static void addAcyclicInnerLoop(Loop &L, LoopInfo &LI, SmallVectorImpl &V) { if (L.empty()) { @@ -2682,20 +1996,6 @@ } } -int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : - ValueToValueMap(); - - int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false); - if (Stride == 1 || Stride == -1) - return Stride; - return 0; -} - -bool LoopVectorizationLegality::isUniform(Value *V) { - return LAI->isUniform(V); -} - Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { assert(V != Induction && "The new induction variable should not be used."); assert(!V->getType()->isVectorTy() && "Can't widen a vector"); @@ -4768,454 +4068,6 @@ assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } -/// \brief Check whether it is safe to if-convert this phi node. -/// -/// Phi nodes with constant expressions that can trap are not safe to if -/// convert. -static bool canIfConvertPHINodes(BasicBlock *BB) { - for (PHINode &Phi : BB->phis()) { - for (Value *V : Phi.incoming_values()) - if (auto *C = dyn_cast(V)) - if (C->canTrap()) - return false; - } - return true; -} - -bool LoopVectorizationLegality::canVectorizeWithIfConvert() { - if (!EnableIfConversion) { - ORE->emit(createMissedAnalysis("IfConversionDisabled") - << "if-conversion is disabled"); - return false; - } - - assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); - - // A list of pointers that we can safely read and write to. - SmallPtrSet SafePointes; - - // Collect safe addresses. - for (BasicBlock *BB : TheLoop->blocks()) { - if (blockNeedsPredication(BB)) - continue; - - for (Instruction &I : *BB) - if (auto *Ptr = getLoadStorePointerOperand(&I)) - SafePointes.insert(Ptr); - } - - // Collect the blocks that need predication. - BasicBlock *Header = TheLoop->getHeader(); - for (BasicBlock *BB : TheLoop->blocks()) { - // We don't support switch statements inside loops. - if (!isa(BB->getTerminator())) { - ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator()) - << "loop contains a switch statement"); - return false; - } - - // We must be able to predicate all blocks that need to be predicated. - if (blockNeedsPredication(BB)) { - if (!blockCanBePredicated(BB, SafePointes)) { - ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) - << "control flow cannot be substituted for a select"); - return false; - } - } else if (BB != Header && !canIfConvertPHINodes(BB)) { - ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator()) - << "control flow cannot be substituted for a select"); - return false; - } - } - - // We can if-convert this loop. - return true; -} - -bool LoopVectorizationLegality::canVectorize() { - // Store the result and return it at the end instead of exiting early, in case - // allowExtraAnalysis is used to report multiple reasons for not vectorizing. - bool Result = true; - - bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); - // We must have a loop in canonical form. Loops with indirectbr in them cannot - // be canonicalized. - if (!TheLoop->getLoopPreheader()) { - DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); - ORE->emit(createMissedAnalysis("CFGNotUnderstood") - << "loop control flow is not understood by vectorizer"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // FIXME: The code is currently dead, since the loop gets sent to - // LoopVectorizationLegality is already an innermost loop. - // - // We can only vectorize innermost loops. - if (!TheLoop->empty()) { - ORE->emit(createMissedAnalysis("NotInnermostLoop") - << "loop is not the innermost loop"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // We must have a single backedge. - if (TheLoop->getNumBackEdges() != 1) { - ORE->emit(createMissedAnalysis("CFGNotUnderstood") - << "loop control flow is not understood by vectorizer"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // We must have a single exiting block. - if (!TheLoop->getExitingBlock()) { - ORE->emit(createMissedAnalysis("CFGNotUnderstood") - << "loop control flow is not understood by vectorizer"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // We only handle bottom-tested loops, i.e. loop in which the condition is - // checked at the end of each iteration. With that we can assume that all - // instructions in the loop are executed the same number of times. - if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - ORE->emit(createMissedAnalysis("CFGNotUnderstood") - << "loop control flow is not understood by vectorizer"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // We need to have a loop header. - DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() - << '\n'); - - // Check if we can if-convert non-single-bb loops. - unsigned NumBlocks = TheLoop->getNumBlocks(); - if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { - DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // Check if we can vectorize the instructions and CFG in this loop. - if (!canVectorizeInstrs()) { - DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // Go over each instruction and look at memory deps. - if (!canVectorizeMemory()) { - DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - DEBUG(dbgs() << "LV: We can vectorize this loop" - << (LAI->getRuntimePointerChecking()->Need - ? " (with a runtime bound check)" - : "") - << "!\n"); - - unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; - if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) - SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; - - if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { - ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks") - << "Too many SCEV assumptions need to be made and checked " - << "at runtime"); - DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - if (DoExtraAnalysis) - Result = false; - else - return false; - } - - // Okay! We've done all the tests. If any have failed, return false. Otherwise - // we can vectorize, and at this point we don't have any other mem analysis - // which may limit our maximum vectorization factor, so just return true with - // no restrictions. - return Result; -} - -static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { - if (Ty->isPointerTy()) - return DL.getIntPtrType(Ty); - - // It is possible that char's or short's overflow when we ask for the loop's - // trip count, work around this by changing the type size. - if (Ty->getScalarSizeInBits() < 32) - return Type::getInt32Ty(Ty->getContext()); - - return Ty; -} - -static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { - Ty0 = convertPointerToIntegerType(DL, Ty0); - Ty1 = convertPointerToIntegerType(DL, Ty1); - if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) - return Ty0; - return Ty1; -} - -/// \brief Check that the instruction has outside loop users and is not an -/// identified reduction variable. -static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, - SmallPtrSetImpl &AllowedExit) { - // Reduction and Induction instructions are allowed to have exit users. All - // other instructions must not have external users. - if (!AllowedExit.count(Inst)) - // Check that all of the users of the loop are inside the BB. - for (User *U : Inst->users()) { - Instruction *UI = cast(U); - // This user may be a reduction exit value. - if (!TheLoop->contains(UI)) { - DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); - return true; - } - } - return false; -} - -void LoopVectorizationLegality::addInductionPhi( - PHINode *Phi, const InductionDescriptor &ID, - SmallPtrSetImpl &AllowedExit) { - Inductions[Phi] = ID; - - // In case this induction also comes with casts that we know we can ignore - // in the vectorized loop body, record them here. All casts could be recorded - // here for ignoring, but suffices to record only the first (as it is the - // only one that may bw used outside the cast sequence). - const SmallVectorImpl &Casts = ID.getCastInsts(); - if (!Casts.empty()) - InductionCastsToIgnore.insert(*Casts.begin()); - - Type *PhiTy = Phi->getType(); - const DataLayout &DL = Phi->getModule()->getDataLayout(); - - // Get the widest type. - if (!PhiTy->isFloatingPointTy()) { - if (!WidestIndTy) - WidestIndTy = convertPointerToIntegerType(DL, PhiTy); - else - WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); - } - - // Int inductions are special because we only allow one IV. - if (ID.getKind() == InductionDescriptor::IK_IntInduction && - ID.getConstIntStepValue() && - ID.getConstIntStepValue()->isOne() && - isa(ID.getStartValue()) && - cast(ID.getStartValue())->isNullValue()) { - - // Use the phi node with the widest type as induction. Use the last - // one if there are multiple (no good reason for doing this other - // than it is expedient). We've checked that it begins at zero and - // steps by one, so this is a canonical induction variable. - if (!PrimaryInduction || PhiTy == WidestIndTy) - PrimaryInduction = Phi; - } - - // Both the PHI node itself, and the "post-increment" value feeding - // back into the PHI node may have external users. - // We can allow those uses, except if the SCEVs we have for them rely - // on predicates that only hold within the loop, since allowing the exit - // currently means re-using this SCEV outside the loop. - if (PSE.getUnionPredicate().isAlwaysTrue()) { - AllowedExit.insert(Phi); - AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); - } - - DEBUG(dbgs() << "LV: Found an induction variable.\n"); -} - -bool LoopVectorizationLegality::canVectorizeInstrs() { - BasicBlock *Header = TheLoop->getHeader(); - - // Look for the attribute signaling the absence of NaNs. - Function &F = *Header->getParent(); - HasFunNoNaNAttr = - F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; - - // For each block in the loop. - for (BasicBlock *BB : TheLoop->blocks()) { - // Scan the instructions in the block and look for hazards. - for (Instruction &I : *BB) { - if (auto *Phi = dyn_cast(&I)) { - Type *PhiTy = Phi->getType(); - // Check that this PHI type is allowed. - if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && - !PhiTy->isPointerTy()) { - ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) - << "loop control flow is not understood by vectorizer"); - DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); - return false; - } - - // If this PHINode is not in the header block, then we know that we - // can convert it to select during if-conversion. No need to check if - // the PHIs in this block are induction or reduction variables. - if (BB != Header) { - // Check that this instruction has no outside users or is an - // identified reduction value with an outside user. - if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) - continue; - ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi) - << "value could not be identified as " - "an induction or reduction variable"); - return false; - } - - // We only allow if-converted PHIs with exactly two incoming values. - if (Phi->getNumIncomingValues() != 2) { - ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) - << "control flow not understood by vectorizer"); - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); - return false; - } - - RecurrenceDescriptor RedDes; - if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, - DT)) { - if (RedDes.hasUnsafeAlgebra()) - Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); - AllowedExit.insert(RedDes.getLoopExitInstr()); - Reductions[Phi] = RedDes; - continue; - } - - InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { - addInductionPhi(Phi, ID, AllowedExit); - if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) - Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); - continue; - } - - if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, - SinkAfter, DT)) { - FirstOrderRecurrences.insert(Phi); - continue; - } - - // As a last resort, coerce the PHI to a AddRec expression - // and re-try classifying it a an induction PHI. - if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { - addInductionPhi(Phi, ID, AllowedExit); - continue; - } - - ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi) - << "value that could not be identified as " - "reduction is used outside the loop"); - DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); - return false; - } // end of PHI handling - - // We handle calls that: - // * Are debug info intrinsics. - // * Have a mapping to an IR intrinsic. - // * Have a vector version available. - auto *CI = dyn_cast(&I); - if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && - !isa(CI) && - !(CI->getCalledFunction() && TLI && - TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { - ORE->emit(createMissedAnalysis("CantVectorizeCall", CI) - << "call instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); - return false; - } - - // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the - // second argument is the same (i.e. loop invariant) - if (CI && hasVectorInstrinsicScalarOpd( - getVectorIntrinsicIDForCall(CI, TLI), 1)) { - auto *SE = PSE.getSE(); - if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { - ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI) - << "intrinsic instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); - return false; - } - } - - // Check that the instruction return type is vectorizable. - // Also, we can't vectorize extractelement instructions. - if ((!VectorType::isValidElementType(I.getType()) && - !I.getType()->isVoidTy()) || - isa(I)) { - ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I) - << "instruction return type cannot be vectorized"); - DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); - return false; - } - - // Check that the stored type is vectorizable. - if (auto *ST = dyn_cast(&I)) { - Type *T = ST->getValueOperand()->getType(); - if (!VectorType::isValidElementType(T)) { - ORE->emit(createMissedAnalysis("CantVectorizeStore", ST) - << "store instruction cannot be vectorized"); - return false; - } - - // FP instructions can allow unsafe algebra, thus vectorizable by - // non-IEEE-754 compliant SIMD units. - // This applies to floating-point math operations and calls, not memory - // operations, shuffles, or casts, as they don't change precision or - // semantics. - } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && - !I.isFast()) { - DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); - Hints->setPotentiallyUnsafe(); - } - - // Reduction instructions are allowed to have exit users. - // All other instructions must not have external users. - if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { - ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I) - << "value cannot be used outside the loop"); - return false; - } - } // next instr. - } - - if (!PrimaryInduction) { - DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); - if (Inductions.empty()) { - ORE->emit(createMissedAnalysis("NoInductionVariable") - << "loop induction variable could not be identified"); - return false; - } - } - - // Now we know the widest induction type, check if our found induction - // is the same size. If it's not, unset it here and InnerLoopVectorizer - // will create another. - if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) - PrimaryInduction = nullptr; - - return true; -} - void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does @@ -8398,7 +7250,7 @@ if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" "attribute is used.\n"); - ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(), + ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "NoImplicitFloat", L) << "loop not vectorized due to NoImplicitFloat attribute"); emitMissedWarning(F, L, Hints, ORE); @@ -8413,7 +7265,7 @@ TTI->isFPVectorizationPotentiallyUnsafe()) { DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); ORE->emit( - createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) + createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) << "loop not vectorized due to unsafe FP support."); emitMissedWarning(F, L, Hints, ORE); return false;