Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -346,10 +346,10 @@ /// \return The width of the largest scalar or vector register type. virtual unsigned getRegisterBitWidth(bool Vector) const; - /// \return The maximum unroll factor that the vectorizer should try to - /// perform for this target. This number depends on the level of parallelism - /// and the number of execution units in the CPU. - virtual unsigned getMaximumUnrollFactor() const; + /// \return The maximum interleave factor that the loop vectorizer should try + /// to perform for this target. This number depends on the level of + /// parallelism and the number of execution units in the CPU. + virtual unsigned getMaxVectorInterleaveFactor() const; /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc. virtual unsigned Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -167,8 +167,8 @@ return PrevTTI->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getMaximumUnrollFactor() const { - return PrevTTI->getMaximumUnrollFactor(); +unsigned TargetTransformInfo::getMaxVectorInterleaveFactor() const { + return PrevTTI->getMaxVectorInterleaveFactor(); } unsigned TargetTransformInfo::getArithmeticInstrCost( @@ -565,7 +565,7 @@ return 32; } - unsigned getMaximumUnrollFactor() const override { + unsigned getMaxVectorInterleaveFactor() const override { return 1; } Index: lib/CodeGen/BasicTargetTransformInfo.cpp =================================================================== --- lib/CodeGen/BasicTargetTransformInfo.cpp +++ lib/CodeGen/BasicTargetTransformInfo.cpp @@ -101,7 +101,7 @@ /// @{ unsigned getNumberOfRegisters(bool Vector) const override; - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxVectorInterleaveFactor() const override; unsigned getRegisterBitWidth(bool Vector) const override; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind, OperandValueProperties, @@ -285,7 +285,7 @@ return 32; } -unsigned BasicTTI::getMaximumUnrollFactor() const { +unsigned BasicTTI::getMaxVectorInterleaveFactor() const { return 1; } Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -104,7 +104,7 @@ return 64; } - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxVectorInterleaveFactor() const override; unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const override; @@ -516,7 +516,7 @@ return Cost; } -unsigned AArch64TTI::getMaximumUnrollFactor() const { +unsigned AArch64TTI::getMaxVectorInterleaveFactor() const { if (ST->isCortexA57() || ST->isCyclone()) return 4; return 2; Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -104,7 +104,7 @@ return 32; } - unsigned getMaximumUnrollFactor() const override { + unsigned getMaxVectorInterleaveFactor() const override { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -91,7 +91,7 @@ virtual unsigned getNumberOfRegisters(bool Vector) const override; virtual unsigned getRegisterBitWidth(bool Vector) const override; - virtual unsigned getMaximumUnrollFactor() const override; + virtual unsigned getMaxVectorInterleaveFactor() const override; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind, OperandValueProperties, @@ -298,7 +298,7 @@ } -unsigned PPCTTI::getMaximumUnrollFactor() const { +unsigned PPCTTI::getMaxVectorInterleaveFactor() const { unsigned Directive = ST->getDarwinDirective(); // The 440 has no SIMD support, but floating-point instructions // have a 5-cycle latency, so unroll by 5x for latency hiding. Index: lib/Target/R600/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -81,7 +81,7 @@ unsigned getNumberOfRegisters(bool Vector) const override; unsigned getRegisterBitWidth(bool Vector) const override; - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxVectorInterleaveFactor() const override; /// @} }; @@ -153,7 +153,7 @@ return 32; } -unsigned AMDGPUTTI::getMaximumUnrollFactor() const { +unsigned AMDGPUTTI::getMaxVectorInterleaveFactor() const { // Semi-arbitrary large amount. return 64; } Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -82,7 +82,7 @@ unsigned getNumberOfRegisters(bool Vector) const override; unsigned getRegisterBitWidth(bool Vector) const override; - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxVectorInterleaveFactor() const override; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind, OperandValueProperties, OperandValueProperties) const override; @@ -167,7 +167,7 @@ } -unsigned X86TTI::getMaximumUnrollFactor() const { +unsigned X86TTI::getMaxVectorInterleaveFactor() const { if (ST->isAtom()) return 1; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -157,18 +157,18 @@ "force-target-num-vector-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of vector registers.")); -/// Maximum vectorization unroll count. -static const unsigned MaxUnrollFactor = 16; +/// Maximum vectorization interleave count. +static const unsigned MaxInterleaveFactor = 16; static cl::opt ForceTargetMaxScalarUnrollFactor( "force-target-max-scalar-unroll", cl::init(0), cl::Hidden, - cl::desc("A flag that overrides the target's max unroll factor for scalar " - "loops.")); + cl::desc("A flag that overrides the target's max unroll/interleave factor " + "for scalar loops.")); static cl::opt ForceTargetMaxVectorUnrollFactor( "force-target-max-vector-unroll", cl::init(0), cl::Hidden, - cl::desc("A flag that overrides the target's max unroll factor for " - "vectorized loops.")); + cl::desc("A flag that overrides the target's max unroll/interleave factor " + "for vectorized loops.")); static cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, @@ -983,17 +983,17 @@ FK_Enabled = 1, ///< Forcing enabled. }; - LoopVectorizeHints(const Loop *L, bool DisableUnrolling) + LoopVectorizeHints(const Loop *L, bool DisableInterleaving) : Width(VectorizationFactor), - Unroll(DisableUnrolling), + Interleave(DisableInterleaving), Force(FK_Undefined), LoopID(L->getLoopID()) { getHints(L); // force-vector-unroll overrides DisableUnrolling. if (VectorizationUnroll.getNumOccurrences() > 0) - Unroll = VectorizationUnroll; + Interleave = VectorizationUnroll; - DEBUG(if (DisableUnrolling && Unroll == 1) dbgs() + DEBUG(if (DisableInterleaving && Interleave == 1) dbgs() << "LV: Unrolling disabled by the pass manager\n"); } @@ -1046,8 +1046,8 @@ R << " (Force=true"; if (Width != 0) R << ", Vector Width=" << Width; - if (Unroll != 0) - R << ", Interleave Count=" << Unroll; + if (Interleave != 0) + R << ", Interleave Count=" << Interleave; R << ")"; } } @@ -1056,7 +1056,7 @@ } unsigned getWidth() const { return Width; } - unsigned getUnroll() const { return Unroll; } + unsigned getInterleave() const { return Interleave; } enum ForceKind getForce() const { return Force; } MDNode *getLoopID() const { return LoopID; } @@ -1120,10 +1120,10 @@ else DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); } else if (Hint == "interleave.count") { - if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor) - Unroll = Val; + if (isPowerOf2_32(Val) && Val <= MaxInterleaveFactor) + Interleave = Val; else - DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); + DEBUG(dbgs() << "LV: ignoring invalid interleave hint metadata\n"); } else { DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); } @@ -1131,8 +1131,8 @@ /// Vectorization width. unsigned Width; - /// Vectorization unroll factor. - unsigned Unroll; + /// Vectorization interleave factor. + unsigned Interleave; /// Vectorization forced enum ForceKind Force; @@ -1149,7 +1149,7 @@ emitLoopVectorizeWarning( F->getContext(), *F, L->getStartLoc(), "failed explicitly specified loop vectorization"); - else if (LH.getUnroll() != 1) + else if (LH.getInterleave() != 1) emitLoopInterleaveWarning( F->getContext(), *F, L->getStartLoc(), "failed explicitly specified loop interleaving"); @@ -1255,7 +1255,7 @@ : (Hints.getForce() == LoopVectorizeHints::FK_Enabled ? "enabled" : "?")) << " width=" << Hints.getWidth() - << " unroll=" << Hints.getUnroll() << "\n"); + << " interleave=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); @@ -1282,7 +1282,7 @@ return false; } - if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) { + if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) { DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); emitOptimizationRemarkAnalysis( F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), @@ -5448,7 +5448,7 @@ // to the increased register pressure. // Use the user preference, unless 'auto' is selected. - int UserUF = Hints->getUnroll(); + int UserUF = Hints->getInterleave(); if (UserUF != 0) return UserUF; @@ -5500,16 +5500,16 @@ UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / std::max(1U, (R.MaxLocalUsers - 1))); - // Clamp the unroll factor ranges to reasonable factors. - unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor(); + // Clamp the interleave factor ranges to reasonable factors. + unsigned MaxInterleaveSize = TTI.getMaxVectorInterleaveFactor(); // Check if the user has overridden the unroll max. if (VF == 1) { if (ForceTargetMaxScalarUnrollFactor.getNumOccurrences() > 0) - MaxUnrollSize = ForceTargetMaxScalarUnrollFactor; + MaxInterleaveSize = ForceTargetMaxScalarUnrollFactor; } else { if (ForceTargetMaxVectorUnrollFactor.getNumOccurrences() > 0) - MaxUnrollSize = ForceTargetMaxVectorUnrollFactor; + MaxInterleaveSize = ForceTargetMaxVectorUnrollFactor; } // If we did not calculate the cost for VF (because the user selected the VF) @@ -5517,10 +5517,10 @@ if (LoopCost == 0) LoopCost = expectedCost(VF); - // Clamp the calculated UF to be between the 1 and the max unroll factor + // Clamp the calculated UF to be between the 1 and the max interleave factor // that the target allows. - if (UF > MaxUnrollSize) - UF = MaxUnrollSize; + if (UF > MaxInterleaveSize) + UF = MaxInterleaveSize; else if (UF < 1) UF = 1;