Diff 275716

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 223 Lines • ▼ Show 20 Lines	static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
{ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1},		{ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1},
};		};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {		if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =		if (const auto *Entry =
ConvertCostTableLookup(MVELoadConversionTbl, ISD,		ConvertCostTableLookup(MVELoadConversionTbl, ISD,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))		DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}

		static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
		// FPExtends are similar but also require the VCVT instructions.
		{ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
		{ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
		};
		if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
		if (const auto *Entry =
		ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,
		DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
		}
}		}

// The truncate of a store is free. This is the mirror of extends above.		// The truncate of a store is free. This is the mirror of extends above.
if (I && I->hasOneUse() && isa<StoreInst>(*I->user_begin())) {		if (I && I->hasOneUse() && isa<StoreInst>(*I->user_begin())) {
static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {		static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},		{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},		{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},		{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},		{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},		{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},		{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
};		};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {		if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =		if (const auto *Entry =
ConvertCostTableLookup(MVELoadConversionTbl, ISD, SrcTy.getSimpleVT(),		ConvertCostTableLookup(MVELoadConversionTbl, ISD, SrcTy.getSimpleVT(),
DstTy.getSimpleVT()))		DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}

		static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
		{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
		{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
		};
		if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
		if (const auto *Entry =
		ConvertCostTableLookup(MVEFLoadConversionTbl, ISD, SrcTy.getSimpleVT(),
		DstTy.getSimpleVT()))
		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
		}
}		}

// NEON vector operations that can extend their inputs.		// NEON vector operations that can extend their inputs.
if ((ISD == ISD::SIGN_EXTEND \|\| ISD == ISD::ZERO_EXTEND) &&		if ((ISD == ISD::SIGN_EXTEND \|\| ISD == ISD::ZERO_EXTEND) &&
I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {		I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {		static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {
// vaddl		// vaddl
{ ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },		{ ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
▲ Show 20 Lines • Show All 212 Lines • ▼ Show 20 Lines	if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
ISD, DstTy.getSimpleVT(),		ISD, DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))		SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}

if (ISD == ISD::FP_ROUND \|\| ISD == ISD::FP_EXTEND) {		if (ISD == ISD::FP_ROUND \|\| ISD == ISD::FP_EXTEND) {
// As general rule, fp converts that were not matched above are scalarized		// As general rule, fp converts that were not matched above are scalarized
// and cost 1 vcvt for each lane, so long as the instruction is available.		// and cost 1 vcvt for each lane, so long as the instruction is available.
// If not it will become a series of function calls.		// If not it will become a series of function calls.
		samparkerUnsubmitted Not Done Reply Inline Actions Maybe a comment on your magic number choices, or how about using getIntrinsicInstrCost for the call cost? samparker: Maybe a comment on your magic number choices, or how about using getIntrinsicInstrCost for the…
		dmgreenAuthorUnsubmitted Done Reply Inline Actions getCallInstrCost looks like it might work - at least give the right cost. It will not include the same codesize changes, but that can be done separately. It's hard to say exactly what the call will cost with all the stuff that might go on inside it. dmgreen: getCallInstrCost looks like it might work - at least give the right cost. It will not include…
const int CallCost = getCallInstrCost(nullptr, Dst, {Src}, CostKind);		const int CallCost = getCallInstrCost(nullptr, Dst, {Src}, CostKind);
int Lanes = 1;		int Lanes = 1;
if (SrcTy.isFixedLengthVector())		if (SrcTy.isFixedLengthVector())
Lanes = SrcTy.getVectorNumElements();		Lanes = SrcTy.getVectorNumElements();
auto IsLegal = [this](EVT VT) {		auto IsLegal = [this](EVT VT) {
EVT EltVT = VT.getScalarType();		EVT EltVT = VT.getScalarType();
return (EltVT == MVT::f32 && ST->hasVFP2Base()) \|\|		return (EltVT == MVT::f32 && ST->hasVFP2Base()) \|\|
(EltVT == MVT::f64 && ST->hasFP64()) \|\|		(EltVT == MVT::f64 && ST->hasFP64()) \|\|
▲ Show 20 Lines • Show All 463 Lines • ▼ Show 20 Lines	if (ST->hasNEON() && Src->isVectorTy() &&
(Alignment && *Alignment != Align(16)) &&		(Alignment && *Alignment != Align(16)) &&
cast<VectorType>(Src)->getElementType()->isDoubleTy()) {		cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
// Unaligned loads/stores are extremely inefficient.		// Unaligned loads/stores are extremely inefficient.
// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.		// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);		std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
return LT.first * 4;		return LT.first * 4;
}		}

		// MVE can optimize a fpext(load(4xhalf)) using an extending integer load.
		// Same for stores.
		if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) && I &&
		((Opcode == Instruction::Load && I->hasOneUse() &&
		isa<FPExtInst>(*I->user_begin())) \|\|
		(Opcode == Instruction::Store && isa<FPTruncInst>(I->getOperand(0))))) {
		FixedVectorType *SrcVTy = cast<FixedVectorType>(Src);
		Type *DstTy =
		Opcode == Instruction::Load
		? (*I->user_begin())->getType()
		: cast<Instruction>(I->getOperand(0))->getOperand(0)->getType();
		if (SrcVTy->getNumElements() == 4 && SrcVTy->getScalarType()->isHalfTy() &&
		DstTy->getScalarType()->isFloatTy())
		return ST->getMVEVectorCostFactor();
		}

int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()		int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
? ST->getMVEVectorCostFactor()		? ST->getMVEVectorCostFactor()
: 1;		: 1;
return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,		return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind, I);		CostKind, I);
}		}

int ARMTTIImpl::getInterleavedMemoryOpCost(		int ARMTTIImpl::getInterleavedMemoryOpCost(
		samparkerUnsubmitted Not Done Reply Inline Actions I thought you stance was that we shouldn't be looking at the context?! samparker: I thought you stance was that we shouldn't be looking at the context?!
		dmgreenAuthorUnsubmitted Done Reply Inline Actions No. It's more... nuanced than that. We need to look at context in order to ever accurately model things. At least at the moment. Without this bit of code for example, all <4 x f16> would be quite expensive, so you end up with any fp16->fp32 convert not being vectorized, even if it should be beneficial. But there are only certain parts of the context that are valid to look at. The opcode of the surrounding instructions is almost certainly OK, and in this case I would claim that the _scalar_ type of the float makes sense because it's converting a float (not an int). The absolute type could be wrong because it would be passed as a different type from the vectorizer (with a different number of vector elements). If it was an integer type then that could be wrong too because the vectorizer can promote types as it vectorizes to a smaller bitwidth. I'm not saying it's a great design and would love a better way to do this, but it's the one we've currently got. dmgreen: No. It's more... nuanced than that. We need to look at context in order to ever accurately…
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,		unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,		Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {		bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");		assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");		assert(isa<VectorType>(VecTy) && "Expect a vector type");

// vldN/vstN doesn't support vector types of i64/f64 element.		// vldN/vstN doesn't support vector types of i64/f64 element.
bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;		bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
▲ Show 20 Lines • Show All 576 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 1,278 Lines • ▼ Show 20 Lines
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends'			; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends'
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16
	▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fpextends'			; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fpextends'
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16
	▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines
	; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-MVE-RECIP-LABEL: 'load_fptrunc'			; CHECK-MVE-RECIP-LABEL: 'load_fptrunc'
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc'			; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc'
	▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-MVE-SIZE-LABEL: 'load_fptrunc'			; CHECK-MVE-SIZE-LABEL: 'load_fptrunc'
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i1664 = fptrunc double undef to half
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i3264 = fptrunc double undef to float
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
	▲ Show 20 Lines • Show All 1,107 Lines • ▼ Show 20 Lines
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends'			; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends'
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
	▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fpextends'			; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fpextends'
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
	▲ Show 20 Lines • Show All 324 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll

Show First 20 Lines • Show All 555 Lines • ▼ Show 20 Lines	for.body:
%1 = load double, double* %arrayidx1, align 8		%1 = load double, double* %arrayidx1, align 8
%add = fadd fast double %1, %0		%add = fadd fast double %1, %0
%arrayidx2 = getelementptr inbounds double, double* %A, i32 %i.09		%arrayidx2 = getelementptr inbounds double, double* %A, i32 %i.09
store double %add, double* %arrayidx2, align 8		store double %add, double* %arrayidx2, align 8
%add3 = add nuw nsw i32 %i.09, 1		%add3 = add nuw nsw i32 %i.09, 1
%exitcond = icmp eq i32 %add3, 431		%exitcond = icmp eq i32 %add3, 431
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; TODO: this fpext could be allowed, but we don't lower it very efficiently yet,
; so reject this for now.
define void @fpext_allowed(float* noalias nocapture %A, half* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {		define void @fpext_allowed(float* noalias nocapture %A, half* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
		samparkerUnsubmitted Not Done Reply Inline Actions Looks like these TODOs can now be removed. samparker: Looks like these TODOs can now be removed.
; CHECK-LABEL: fpext_allowed(		; CHECK-LABEL: fpext_allowed(
; PREFER-FOLDING-NOT: vector.body:		; PREFER-FOLDING: vector.body:
; PREFER-FOLDING-NOT: llvm.masked.load		; PREFER-FOLDING-NOT: llvm.masked.load
; PREFER-FOLDING-NOT: llvm.masked.store		; PREFER-FOLDING-NOT: llvm.masked.store
; PREFER-FOLDING-NOT: br i1 %{{.}}, label %{{.}}, label %vector.body		; PREFER-FOLDING: br i1 %{{.}}, label %{{.}}, label %vector.body
entry:		entry:
br label %for.body		br label %for.body

for.cond.cleanup:		for.cond.cleanup:
ret void		ret void

for.body:		for.body:
%i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]		%i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
%arrayidx = getelementptr inbounds half, half* %B, i32 %i.09		%arrayidx = getelementptr inbounds half, half* %B, i32 %i.09
%0 = load half, half* %arrayidx, align 2		%0 = load half, half* %arrayidx, align 2
%conv = fpext half %0 to float		%conv = fpext half %0 to float
%arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09		%arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
%1 = load float, float* %arrayidx1, align 4		%1 = load float, float* %arrayidx1, align 4
%add = fadd fast float %1, %conv		%add = fadd fast float %1, %conv
%arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09		%arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09
store float %add, float* %arrayidx2, align 4		store float %add, float* %arrayidx2, align 4
%add3 = add nuw nsw i32 %i.09, 1		%add3 = add nuw nsw i32 %i.09, 1
%exitcond = icmp eq i32 %add3, 431		%exitcond = icmp eq i32 %add3, 431
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; TODO: this fptrunc could be allowed, but we don't lower it very efficiently yet,
; so reject this for now.
define void @fptrunc_allowed(half* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {		define void @fptrunc_allowed(half* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
; CHECK-LABEL: fptrunc_allowed(		; CHECK-LABEL: fptrunc_allowed(
; PREFER-FOLDING-NOT: vector.body:		; PREFER-FOLDING: vector.body:
; PREFER-FOLDING-NOT: llvm.masked.load		; PREFER-FOLDING-NOT: llvm.masked.load
; PREFER-FOLDING-NOT: llvm.masked.store		; PREFER-FOLDING-NOT: llvm.masked.store
; PREFER-FOLDING-NOT: br i1 %{{.}}, label %{{.}}, label %vector.body		; PREFER-FOLDING: br i1 %{{.}}, label %{{.}}, label %vector.body
entry:		entry:
br label %for.body		br label %for.body

for.cond.cleanup:		for.cond.cleanup:
ret void		ret void

for.body:		for.body:
%i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]		%i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] MVE FP16 cost adjustments
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 275716

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ARM] MVE FP16 cost adjustments ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 275716

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll

[ARM] MVE FP16 cost adjustments
ClosedPublic