Diff 358621

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,383 Lines • ▼ Show 20 Lines	AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment, unsigned AddressSpace,		Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind) {		TTI::TargetCostKind CostKind) {
if (!isa<ScalableVectorType>(Src))		if (!isa<ScalableVectorType>(Src))
return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,		return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);		CostKind);
auto LT = TLI->getTypeLegalizationCost(DL, Src);		auto LT = TLI->getTypeLegalizationCost(DL, Src);
if (!LT.first.isValid())		if (!LT.first.isValid())
return InstructionCost::getInvalid();		return InstructionCost::getInvalid();

		// The code-generator is currently not able to handle scalable vectors
		// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
		// it. This change will be removed when code-generation for these types is
		// sufficiently reliable.
		if (cast<VectorType>(Src)->getElementCount() == ElementCount::getScalable(1))
		return InstructionCost::getInvalid();

return LT.first * 2;		return LT.first * 2;
}		}

InstructionCost AArch64TTIImpl::getGatherScatterOpCost(		InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,		unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {		Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

if (!isa<ScalableVectorType>(DataTy))		if (!isa<ScalableVectorType>(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,		return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);		Alignment, CostKind, I);
auto *VT = cast<VectorType>(DataTy);		auto *VT = cast<VectorType>(DataTy);
auto LT = TLI->getTypeLegalizationCost(DL, DataTy);		auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
if (!LT.first.isValid())		if (!LT.first.isValid())
return InstructionCost::getInvalid();		return InstructionCost::getInvalid();

		// The code-generator is currently not able to handle scalable vectors
		// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
		// it. This change will be removed when code-generation for these types is
		// sufficiently reliable.
		if (cast<VectorType>(DataTy)->getElementCount() ==
		ElementCount::getScalable(1))
		return InstructionCost::getInvalid();

ElementCount LegalVF = LT.second.getVectorElementCount();		ElementCount LegalVF = LT.second.getVectorElementCount();
Optional<unsigned> MaxNumVScale = getMaxVScale();		Optional<unsigned> MaxNumVScale = getMaxVScale();
assert(MaxNumVScale && "Expected valid max vscale value");		assert(MaxNumVScale && "Expected valid max vscale value");

InstructionCost MemOpCost =		InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);		getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
unsigned MaxNumElementsPerGather =		unsigned MaxNumElementsPerGather =
MaxNumVScale.getValue() * LegalVF.getKnownMinValue();		MaxNumVScale.getValue() * LegalVF.getKnownMinValue();
Show All 14 Lines	InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
if (VT == MVT::Other)		if (VT == MVT::Other)
return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,		return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
CostKind);		CostKind);

auto LT = TLI->getTypeLegalizationCost(DL, Ty);		auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (!LT.first.isValid())		if (!LT.first.isValid())
return InstructionCost::getInvalid();		return InstructionCost::getInvalid();

		// The code-generator is currently not able to handle scalable vectors
		// of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
		// it. This change will be removed when code-generation for these types is
		// sufficiently reliable.
		if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
		if (VTy->getElementCount() == ElementCount::getScalable(1))
		return InstructionCost::getInvalid();

// TODO: consider latency as well for TCK_SizeAndLatency.		// TODO: consider latency as well for TCK_SizeAndLatency.
if (CostKind == TTI::TCK_CodeSize \|\| CostKind == TTI::TCK_SizeAndLatency)		if (CostKind == TTI::TCK_CodeSize \|\| CostKind == TTI::TCK_SizeAndLatency)
return LT.first;		return LT.first;

if (CostKind != TTI::TCK_RecipThroughput)		if (CostKind != TTI::TCK_RecipThroughput)
return 1;		return 1;

if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&		if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
▲ Show 20 Lines • Show All 594 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll

Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
		; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>* undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>* undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)		; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>* undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
entry:		entry:
; Legal scalable integer types		; Legal scalable integer types
%nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)		%nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
%nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)		%nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
Show All 10 Lines	entry:
%nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)		%nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
%nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)		%nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
%nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)		%nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
%nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)		%nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
%nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)		%nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
%nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)		%nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)

; A couple of examples of illegal scalable types		; A couple of examples of illegal scalable types
		%nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64> *undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
%nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)		%nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
%nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half> *undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)		%nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half> *undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)

ret void		ret void
}		}

declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)		declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)		declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
Show All 21 Lines
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)		declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)		declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>*, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)		declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>*, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)		declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)		declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)		declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)		declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>*, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)		declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>*, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
		declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>*, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>)		declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)		declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)		declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>*, i32, <vscale x 32 x i1>, <vscale x 32 x half>)		declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>*, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>)		declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)		declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)		declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

	; Check getIntrinsicInstrCost in BasicTTIImpl.h for masked gather			; Check getIntrinsicInstrCost in BasicTTIImpl.h for masked gather

	; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s

	define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {			define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
	; CHECK-LABEL: 'masked_gathers'			; CHECK-LABEL: 'masked_gathers'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128			; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128
				; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64
	%res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)			%res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
	%res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)			%res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
	%res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> undef, i32 0, <4 x i1> %v4i1mask, <4 x i32> zeroinitializer)			%res.v4i32 = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> undef, i32 0, <4 x i1> %v4i1mask, <4 x i32> zeroinitializer)
	%res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> undef, i32 0, <1 x i1> %v1i1mask, <1 x i128> zeroinitializer)			%res.v1i128 = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> undef, i32 0, <1 x i1> %v1i1mask, <1 x i128> zeroinitializer)
				%res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
	ret void			ret void
	}			}

	declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)			declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
	declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)			declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
	declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)			declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
	declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>)			declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>)
				declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)

llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll

	; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s

	define void @scalable_loads() {			define void @scalable_loads() {
	; CHECK-LABEL: 'scalable_loads'			; CHECK-LABEL: 'scalable_loads'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv8i8			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv8i8
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv16i8			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv16i8
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv32i8			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv32i8
				; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64
	%res.nxv8i8 = load <vscale x 8 x i8>, <vscale x 8 x i8>* undef			%res.nxv8i8 = load <vscale x 8 x i8>, <vscale x 8 x i8>* undef
	%res.nxv16i8 = load <vscale x 16 x i8>, <vscale x 16 x i8>* undef			%res.nxv16i8 = load <vscale x 16 x i8>, <vscale x 16 x i8>* undef
	%res.nxv32i8 = load <vscale x 32 x i8>, <vscale x 32 x i8>* undef			%res.nxv32i8 = load <vscale x 32 x i8>, <vscale x 32 x i8>* undef
				%res.nxv1i64 = load <vscale x 1 x i64>, <vscale x 1 x i64>* undef
	ret void			ret void
	}			}

	define void @scalable_stores() {			define void @scalable_stores() {
	; CHECK-LABEL: 'scalable_stores'			; CHECK-LABEL: 'scalable_stores'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 8 x i8>			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 8 x i8>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 16 x i8>			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 16 x i8>
	; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 32 x i8>			; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 32 x i8>
				; CHECK-NEXT: Cost Model: Invalid cost for instruction: store <vscale x 1 x i64>
	store <vscale x 8 x i8> undef, <vscale x 8 x i8>* undef			store <vscale x 8 x i8> undef, <vscale x 8 x i8>* undef
	store <vscale x 16 x i8> undef, <vscale x 16 x i8>* undef			store <vscale x 16 x i8> undef, <vscale x 16 x i8>* undef
	store <vscale x 32 x i8> undef, <vscale x 32 x i8>* undef			store <vscale x 32 x i8> undef, <vscale x 32 x i8>* undef
				store <vscale x 1 x i64> undef, <vscale x 1 x i64>* undef
	ret void			ret void
	}			}

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

	; Check getIntrinsicInstrCost in BasicTTIImpl.h with for masked scatter			; Check getIntrinsicInstrCost in BasicTTIImpl.h with for masked scatter

	; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s

	define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {			define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
	; CHECK-LABEL: 'masked_scatters'			; CHECK-LABEL: 'masked_scatters'
	; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32			; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32
	; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128			; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128
				; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64
	call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)			call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
	call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)			call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
	call void @llvm.masked.scatter.v4i32(<4 x i32> undef, <4 x i32*> undef, i32 0, <4 x i1> %v4i1mask)			call void @llvm.masked.scatter.v4i32(<4 x i32> undef, <4 x i32*> undef, i32 0, <4 x i1> %v4i1mask)
	call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> undef, <1 x i128*> undef, i32 0, <1 x i1> %v1i1mask)			call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> undef, <1 x i128*> undef, i32 0, <1 x i1> %v1i1mask)
				call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
	ret void			ret void
	}			}

	declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)			declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
	declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)			declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
	declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)			declare void @llvm.masked.scatter.v4i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
	declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128>, <1 x i128*>, i32, <1 x i1>)			declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128>, <1 x i128*>, i32, <1 x i1>)
				declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

	; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine < %s -S \| FileCheck %s			; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -dce -instcombine < %s -S \| FileCheck %s

	; Test that we can add on the induction variable			; Test that we can add on the induction variable
	; for (long long i = 0; i < n; i++) {			; for (long long i = 0; i < n; i++) {
	; a[i] = b[i] + i;			; a[i] = b[i] + i;
	; }			; }
	; with an unroll factor (interleave count) of 2.			; with an unroll factor (interleave count) of 2.

	define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {			define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {
	▲ Show 20 Lines • Show All 219 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][AArch64] Make loads/stores of <vscale x 1 x eltty> expensive.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 358621

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][AArch64] Make loads/stores of <vscale x 1 x eltty> expensive.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 358621

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll

llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

llvm/test/Analysis/CostModel/AArch64/sve-ldst.ll

llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

[CostModel][AArch64] Make loads/stores of <vscale x 1 x eltty> expensive.
ClosedPublic