Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -791,9 +791,12 @@ /// Use -1 to indicate that there is no information on the index value. int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; - /// \return The cost of Load and Store instructions. + /// \return The cost of Load and Store instructions. \p Src is the type of + /// the load / store which may be a scalar or vector type. If \p Scalarized + /// is true this is a scalarized memory access of a vector element. int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr) const; + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false) const; /// \return The cost of masked Load and Store instructions. int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, @@ -1117,7 +1120,8 @@ virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) = 0; + unsigned AddressSpace, const Instruction *I, + bool Scalarized) = 0; virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) = 0; @@ -1452,8 +1456,10 @@ return Impl.getVectorInstrCost(Opcode, Val, Index); } int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) override { - return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + unsigned AddressSpace, const Instruction *I, + bool Scalarized) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I, + Scalarized); } int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -432,7 +432,8 @@ } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, const Instruction *I, + bool Scalarized) { return 1; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -750,7 +750,8 @@ } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr) { + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false) { assert(!Src->isVoidTy() && "Invalid type"); std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -444,10 +444,14 @@ int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, - const Instruction *I) const { + const Instruction *I, + bool Scalarized) const { assert ((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + assert (!(Scalarized && Src->isVectorTy()) && + "Scalarized instruction should be queried with scalar type"); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I, + Scalarized); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -132,7 +132,8 @@ const Instruction *I = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); int getCostOfKeepingLiveOverCall(ArrayRef Tys); Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -619,7 +619,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Alignment, unsigned AddressSpace, - const Instruction *I) { + const Instruction *I, bool Scalarized) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -165,7 +165,8 @@ ArrayRef Args = ArrayRef()); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -526,7 +526,8 @@ } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, const Instruction *I, + bool Scalarized) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && Index: lib/Target/Hexagon/HexagonTargetTransformInfo.h =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -114,7 +114,8 @@ unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Index: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -151,7 +151,8 @@ } unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, unsigned AddressSpace, const Instruction *I) { + unsigned Alignment, unsigned AddressSpace, const Instruction *I, + bool Scalarized) { assert(Opcode == Instruction::Load || Opcode == Instruction::Store); if (Opcode == Instruction::Store) return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); Index: lib/Target/PowerPC/PPCTargetTransformInfo.h =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.h +++ lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -85,7 +85,8 @@ const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Index: lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -400,7 +400,8 @@ } int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, const Instruction *I, + bool Scalarized) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -86,7 +86,8 @@ const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -822,11 +822,11 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, - const Instruction *I) { + const Instruction *I, bool Scalarized) { assert(!Src->isVoidTy() && "Invalid type"); if (!Src->isVectorTy() && Opcode == Instruction::Load && - I != nullptr && I->hasOneUse()) { + I != nullptr && I->hasOneUse() && !Scalarized) { const Instruction *UserI = cast(*I->user_begin()); unsigned Bits = Src->getScalarSizeInBits(); bool FoldsLoad = false; Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -76,7 +76,8 @@ const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, const Instruction *I = nullptr, + bool Scalarized = false); int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1906,7 +1906,8 @@ } int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, const Instruction *I, + bool Scalarized) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getVectorNumElements(); Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5254,7 +5254,7 @@ Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, - AS, I); + AS, I, true/*Scalarized*/); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. Index: test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll @@ -0,0 +1,26 @@ +; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \ +; RUN: -force-vector-width=4 -debug-only=loop-vectorize \ +; RUN: -enable-interleaved-mem-accesses=false -disable-output < %s 2>&1 \ +; RUN: | FileCheck %s +; REQUIRES: asserts +; + +define i32 @fun(i64* %data, i64 %n, i64 %s, i32* %Src) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %acc = phi i32 [ 0, %entry ], [ %acc_next, %for.body ] + %gep = getelementptr inbounds i32, i32* %Src, i64 %iv + %ld = load i32, i32* %gep + %acc_next = add i32 %acc, %ld + %iv.next = add nuw nsw i64 %iv, 2 + %cmp110.us = icmp slt i64 %iv.next, %n + br i1 %cmp110.us, label %for.body, label %for.end + +for.end: + ret i32 %acc_next + +; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %ld = load i32, i32* %gep +}