diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -773,9 +773,7 @@ /// illegal as the original, thus leading to an infinite legalisation loop. /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal /// vector types this override can be removed. - bool mergeStoresAfterLegalization(EVT VT) const override { - return !useSVEForFixedLengthVectors(); - } + bool mergeStoresAfterLegalization(EVT VT) const override; private: /// Keep a pointer to the AArch64Subtarget around so that we can @@ -1008,7 +1006,6 @@ bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const override; - bool useSVEForFixedLengthVectors() const; // Normally SVE is only used for byte size vectors that do not fit within a // NEON vector. This changes when OverrideNEON is true, allowing SVE to be // used for 64bit and 128bit vectors as well. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -269,7 +269,7 @@ addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass); } - if (useSVEForFixedLengthVectors()) { + if (Subtarget->useSVEForFixedLengthVectors()) { for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) if (useSVEForFixedLengthVectorVT(VT)) addRegisterClass(VT, &AArch64::ZPRRegClass); @@ -1085,7 +1085,7 @@ // NOTE: Currently this has to happen after computeRegisterProperties rather // than the preferred option of combining it with the addRegisterClass call. - if (useSVEForFixedLengthVectors()) { + if (Subtarget->useSVEForFixedLengthVectors()) { for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) if (useSVEForFixedLengthVectorVT(VT)) addTypeForFixedLengthSVE(VT); @@ -4140,14 +4140,13 @@ } } -bool AArch64TargetLowering::useSVEForFixedLengthVectors() const { - // Prefer NEON unless larger SVE registers are available. - return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256; +bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { + return !Subtarget->useSVEForFixedLengthVectors(); } bool AArch64TargetLowering::useSVEForFixedLengthVectorVT( EVT VT, bool OverrideNEON) const { - if (!useSVEForFixedLengthVectors()) + if (!Subtarget->useSVEForFixedLengthVectors()) return false; if (!VT.isFixedLengthVector()) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -555,6 +555,7 @@ // implied by the architecture. unsigned getMaxSVEVectorSizeInBits() const; unsigned getMinSVEVectorSizeInBits() const; + bool useSVEForFixedLengthVectors() const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -368,3 +368,8 @@ return (SVEVectorBitsMin / 128) * 128; return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; } + +bool AArch64Subtarget::useSVEForFixedLengthVectors() const { + // Prefer NEON unless larger SVE registers are available. + return hasSVE() && getMinSVEVectorSizeInBits() >= 256; +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -147,6 +147,7 @@ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; + bool useNeonVector(const Type *Ty) const; int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -751,6 +751,10 @@ return Options; } +bool AArch64TTIImpl::useNeonVector(const Type *Ty) const { + return isa(Ty) && !ST->useSVEForFixedLengthVectors(); +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -778,7 +782,7 @@ return LT.first * 2 * AmortizationCost; } - if (Ty->isVectorTy() && + if (useNeonVector(Ty) && cast(Ty)->getElementType()->isIntegerTy(8)) { unsigned ProfitableNumElements; if (Opcode == Instruction::Store) diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -0,0 +1,88 @@ +; Check memory cost model action for fixed vector SVE and Neon +; Vector bits size lower than 256 bits end up assuming Neon cost model +; CHECK-NEON has same performance as CHECK-SVE-128 + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NEON +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s --check-prefix=CHECK-SVE-128 +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s --check-prefix=CHECK-SVE-256 +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefix=CHECK-SVE-512 + +define <16 x i8> @load16(<16 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load16': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <16 x i8>, <16 x i8>* %ptr + ret <16 x i8> %out +} + +define void @store16(<16 x i8>* %ptr, <16 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store16': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <16 x i8> %val, <16 x i8>* %ptr + ret void +} + +define <8 x i8> @load8(<8 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load8': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <8 x i8>, <8 x i8>* %ptr + ret <8 x i8> %out +} + +define void @store8(<8 x i8>* %ptr, <8 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store8': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <8 x i8> %val, <8 x i8>* %ptr + ret void +} + +define <4 x i8> @load4(<4 x i8>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load4': +; CHECK-NEON: Cost Model: Found an estimated cost of 64 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 64 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <4 x i8>, <4 x i8>* %ptr + ret <4 x i8> %out +} + +define void @store4(<4 x i8>* %ptr, <4 x i8> %val) { +; CHECK: 'Cost Model Analysis' for function 'store4': +; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + store <4 x i8> %val, <4 x i8>* %ptr + ret void +} + +define <16 x i16> @load_256(<16 x i16>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load_256': +; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <16 x i16>, <16 x i16>* %ptr + ret <16 x i16> %out +} + +define <8 x i64> @load_512(<8 x i64>* %ptr) { +; CHECK: 'Cost Model Analysis' for function 'load_512': +; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction: +; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction: +; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction: + %out = load <8 x i64>, <8 x i64>* %ptr + ret <8 x i64> %out +} diff --git a/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll @@ -0,0 +1,51 @@ +; Checks if the memory cost model does not break when using scalable vectors + +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define @load-sve-8(* %ptr) { +; CHECK-LABEL: 'load-sve-8': +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-8(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +} + +define @load-sve-16(* %ptr) { +; CHECK-LABEL: 'load-sve-16': +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-16(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +} + +define @load-sve-32(* %ptr) { +; CHECK-LABEL: 'load-sve-32': +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + %retval = load , * %ptr + ret %retval +} + +define void @store-sve-32(* %ptr, %val) { +; CHECK-LABEL: 'store-sve-32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: + store %val, * %ptr + ret void +}