diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -506,10 +506,6 @@ /// True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode = false; - /// Contains the Overhead of gather\scatter instructions - int GatherOverhead = 1024; - int ScatterOverhead = 1024; - X86SelectionDAGInfo TSInfo; // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which // X86TargetLowering needs. @@ -678,8 +674,6 @@ bool isPMADDWDSlow() const { return IsPMADDWDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } - int getGatherOverhead() const { return GatherOverhead; } - int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); } bool useLeaForSP() const { return UseLeaForSP; } diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -262,17 +262,6 @@ isTargetKFreeBSD() || In64BitMode) stackAlignment = Align(16); - // Some CPUs have more overhead for gather. The specified overhead is relative - // to the Load operation. "2" is the number provided by Intel architects. This - // parameter is used for cost estimation of Gather Op and comparison with - // other alternatives. - // TODO: Remove the explicit hasAVX512()?, That would mean we would only - // enable gather with a -march. - if (hasAVX512() || (hasAVX2() && hasFastGather())) - GatherOverhead = 2; - if (hasAVX512()) - ScatterOverhead = 2; - // Consume the vector width attribute or apply any target specific limit. if (PreferVectorWidthOverride) PreferVectorWidth = PreferVectorWidthOverride; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -244,6 +244,9 @@ int getGSVectorCost(unsigned Opcode, Type *DataTy, const Value *Ptr, Align Alignment, unsigned AddressSpace); + int getGatherOverhead() const; + int getScatterOverhead() const; + /// @} }; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3848,6 +3848,26 @@ return CostKind == TTI::TCK_RecipThroughput ? 0 : 1; } +int X86TTIImpl::getGatherOverhead() const { + // Some CPUs have more overhead for gather. The specified overhead is relative + // to the Load operation. "2" is the number provided by Intel architects. This + // parameter is used for cost estimation of Gather Op and comparison with + // other alternatives. + // TODO: Remove the explicit hasAVX512()?, That would mean we would only + // enable gather with a -march. + if (ST->hasAVX512() || (ST->hasAVX2() && ST->hasFastGather())) + return 2; + + return 1024; +} + +int X86TTIImpl::getScatterOverhead() const { + if (ST->hasAVX512()) + return 2; + + return 1024; +} + // Return an average cost of Gather / Scatter instruction, maybe improved later int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, const Value *Ptr, Align Alignment, unsigned AddressSpace) { @@ -3906,8 +3926,8 @@ // The gather / scatter cost is given by Intel architects. It is a rough // number since we are looking at one instruction in a time. const int GSOverhead = (Opcode == Instruction::Load) - ? ST->getGatherOverhead() - : ST->getScatterOverhead(); + ? getGatherOverhead() + : getScatterOverhead(); return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), MaybeAlign(Alignment), AddressSpace, TTI::TCK_RecipThroughput);