diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1096,7 +1096,7 @@ /// \return The maximum interleave factor that any transform should try to /// perform for this target. This number depends on the level of parallelism /// and the number of execution units in the CPU. - unsigned getMaxInterleaveFactor(unsigned VF) const; + unsigned getMaxInterleaveFactor(ElementCount VF) const; /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2. static OperandValueInfo getOperandInfo(const Value *V); @@ -1796,7 +1796,7 @@ /// \return if target want to issue a prefetch in address space \p AS. virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0; - virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; + virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0; virtual InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, @@ -2356,7 +2356,7 @@ return Impl.shouldPrefetchAddressSpace(AS); } - unsigned getMaxInterleaveFactor(unsigned VF) override { + unsigned getMaxInterleaveFactor(ElementCount VF) override { return Impl.getMaxInterleaveFactor(VF); } unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -491,7 +491,7 @@ bool enableWritePrefetching() const { return false; } bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } - unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } + unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -844,7 +844,7 @@ } } - unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } + unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; } InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -735,7 +735,7 @@ return TTIImpl->shouldPrefetchAddressSpace(AS); } -unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { +unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -144,7 +144,7 @@ return VF.getKnownMinValue() * ST->getVScaleForTuning(); } - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); bool prefersVectorizedAddressing() const; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2673,7 +2673,7 @@ return Cost; } -unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned AArch64TTIImpl::getMaxInterleaveFactor(ElementCount VF) { return ST->getMaxInterleaveFactor(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -143,7 +143,7 @@ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional AtomicCpySize) const; - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -474,10 +474,10 @@ } } -unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned GCNTTIImpl::getMaxInterleaveFactor(ElementCount VF) { // Disable unrolling if the loop is not vectorized. // TODO: Enable this again. - if (VF == 1) + if (VF.isScalar()) return 1; return 8; diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h @@ -57,7 +57,7 @@ unsigned AddrSpace) const; bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); using BaseT::getVectorInstrCost; diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -82,10 +82,10 @@ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); } -unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) { // Disable unrolling if the loop is not vectorized. // TODO: Enable this again. - if (VF == 1) + if (VF.isScalar()) return 1; return 8; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -178,7 +178,7 @@ llvm_unreachable("Unsupported register kind"); } - unsigned getMaxInterleaveFactor(unsigned VF) { + unsigned getMaxInterleaveFactor(ElementCount VF) { return ST->getMaxInterleaveFactor(); } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -82,7 +82,7 @@ /// @{ unsigned getNumberOfRegisters(bool vector) const; - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; unsigned getMinVectorRegisterBitWidth() const; ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -110,7 +110,7 @@ return 32; } -unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) { return useHVX() ? 2 : 1; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -101,7 +101,7 @@ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; unsigned getCacheLineSize() const override; unsigned getPrefetchDistance() const override; - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2); InstructionCost getArithmeticInstrCost( diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -517,7 +517,7 @@ return 300; } -unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) { unsigned Directive = ST->getCPUDirective(); // The 440 has no SIMD support, but floating-point instructions // have a 5-cycle latency, so unroll by 5x for latency hiding. diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -270,10 +270,10 @@ } } - unsigned getMaxInterleaveFactor(unsigned VF) { + unsigned getMaxInterleaveFactor(ElementCount VF) { // If the loop will not be vectorized, don't interleave the loop. // Let regular unroll to unroll the loop. - return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); + return VF.getKnownMinValue() == 1 ? 1 : ST->getMaxInterleaveFactor(); } enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -127,7 +127,7 @@ unsigned getNumberOfRegisters(unsigned ClassID) const; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; - unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getMaxInterleaveFactor(ElementCount VF); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -199,11 +199,11 @@ .getFixedValue(); } -unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { +unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) { // If the loop will not be vectorized, don't interleave the loop. // Let regular unroll to unroll the loop, which saves the overflow // check and memory check cost. - if (VF == 1) + if (VF.isScalar()) return 1; if (ST->isAtom()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5548,7 +5548,7 @@ // We also consider epilogue vectorization unprofitable for targets that don't // consider interleaving beneficial (eg. MVE). - if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1) + if (TTI.getMaxInterleaveFactor(VF) <= 1) return false; // FIXME: We should consider changing the threshold for scalable // vectors to take VScaleForTuning into account. @@ -5803,8 +5803,7 @@ } // Clamp the interleave ranges to reasonable counts. - unsigned MaxInterleaveCount = - TTI.getMaxInterleaveFactor(VF.getKnownMinValue()); + unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); // Check if the user has overridden the max. if (VF.isScalar()) { @@ -10539,7 +10538,7 @@ // vector registers, loop vectorization may still enable scalar // interleaving. if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) && - TTI->getMaxInterleaveFactor(1) < 2) + TTI->getMaxInterleaveFactor(ElementCount::getFixed(1)) < 2) return LoopVectorizeResult(false, false); bool Changed = false, CFGChanged = false;