Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -434,6 +434,8 @@ unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF, Type *VecTy = nullptr) const; + bool supportsVectorElementLoadStore() const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -775,6 +777,7 @@ getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; virtual unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF, Type *VecTy) = 0; + virtual bool supportsVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -980,6 +983,10 @@ return Impl.getOperandsScalarizationOverhead(Args, VF, VecTy); } + bool supportsVectorElementLoadStore() override { + return Impl.supportsVectorElementLoadStore(); + } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -262,6 +262,8 @@ unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF, Type *VecTy) { return 0; } + bool supportsVectorElementLoadStore() { return false; } + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } bool enableInterleavedAccessVectorization() { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -197,6 +197,10 @@ return TTIImpl->getOperandsScalarizationOverhead(Args, VF, VecTy); } +bool TargetTransformInfo::supportsVectorElementLoadStore() const { + return TTIImpl->supportsVectorElementLoadStore(); +} + bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -55,6 +55,8 @@ unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + bool supportsVectorElementLoadStore() { return true; } + bool isFPVectorizationPotentiallyUnsafe() { return false; } int getArithmeticInstrCost( Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3723,13 +3723,15 @@ unsigned Cost = 0; Type *RetTy = ToVectorTy(I->getType(), VF); - if (!RetTy->isVoidTy()) + if (!RetTy->isVoidTy() && + (!isa(I) || !TTI.supportsVectorElementLoadStore())) Cost += TTI.getScalarizationOverhead(RetTy, true, false); if (CallInst *CI = dyn_cast(I)) { SmallVector Operands(CI->arg_operands()); Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - } else { + } + else if (!isa(I) || !TTI.supportsVectorElementLoadStore()) { SmallVector Operands(I->operand_values()); Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); }