Diff 281538

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 307 Lines • ▼ Show 20 Lines	int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
assert(ISD && "Invalid opcode");		assert(ISD && "Invalid opcode");

// TODO: Allow non-throughput costs that aren't binary.		// TODO: Allow non-throughput costs that aren't binary.
auto AdjustCost = [&CostKind](int Cost) {		auto AdjustCost = [&CostKind](int Cost) {
if (CostKind != TTI::TCK_RecipThroughput)		if (CostKind != TTI::TCK_RecipThroughput)
return Cost == 0 ? 0 : 1;		return Cost == 0 ? 0 : 1;
return Cost;		return Cost;
};		};
		auto IsLegalFPType = [this](EVT VT) {
		EVT EltVT = VT.getScalarType();
		return (EltVT == MVT::f32 && ST->hasVFP2Base()) \|\|
		(EltVT == MVT::f64 && ST->hasFP64()) \|\|
		(EltVT == MVT::f16 && ST->hasFullFP16());
		};

EVT SrcTy = TLI->getValueType(DL, Src);		EVT SrcTy = TLI->getValueType(DL, Src);
EVT DstTy = TLI->getValueType(DL, Dst);		EVT DstTy = TLI->getValueType(DL, Dst);

if (!SrcTy.isSimple() \|\| !DstTy.isSimple())		if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
return AdjustCost(		return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));		BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));

// The extend of a load is free		// Extending masked load/Truncating masked stores is expensive because we
if (I && isa<LoadInst>(I->getOperand(0))) {		// currently don't split them. This means that we'll likely end up
		// loading/storing each element individually (hence the high cost).
		if ((ST->hasMVEIntegerOps() &&
		(Opcode == Instruction::Trunc \|\| Opcode == Instruction::ZExt \|\|
		Opcode == Instruction::SExt)) \|\|
		(ST->hasMVEFloatOps() &&
		(Opcode == Instruction::FPExt \|\| Opcode == Instruction::FPTrunc) &&
		IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
		if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
		dmgreenAuthorUnsubmitted Done Reply Inline Actions This should now be (CCH == Normal \|\| CCh == Masked) ? The costs wouldn't really be right for other types I don't think. We may need to fix those up later, but that can be done in a different patch. dmgreen: This should now be (CCH == Normal \|\| CCh == Masked) ? The costs wouldn't really be right for…
		return 2 * DstTy.getVectorNumElements() * ST->getMVEVectorCostFactor();

		// The extend of other kinds of load is free
		if (CCH == TTI::CastContextHint::Normal \|\|
		CCH == TTI::CastContextHint::Masked) {
		dmgreenAuthorUnsubmitted Done Reply Inline Actions Technically it's not that we can't split them, it's that we choose not to. Essentially we will have to end up paying the cost somewhere, and from a cost-modelling perspective, this is a good place to say that splitting them will be expensive, especially in a tail predicated loop where we won't be able to split a vctp sensibly. Also do we end up scalarize the masked load? That might change where we put the cost (we could just implement getMemoryOpCost and put the cost their instead). dmgreen: Technically it's not that we can't split them, it's that we choose not to. Essentially we will…
static const TypeConversionCostTblEntry LoadConversionTbl[] = {		static const TypeConversionCostTblEntry LoadConversionTbl[] = {
{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},		{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},		{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
{ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},		{ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
{ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},		{ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
{ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},		{ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
{ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},		{ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
{ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},		{ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
Show All 37 Lines	static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},		{ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
};		};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {		if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =		if (const auto *Entry =
ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,		ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,
DstTy.getSimpleVT(), SrcTy.getSimpleVT()))		DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}
}

// The truncate of a store is free. This is the mirror of extends above.		// The truncate of a store is free. This is the mirror of extends above.
if (I && I->hasOneUse() && isa<StoreInst>(*I->user_begin())) {		static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},		{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},		{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},		{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},		{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},		{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},		{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
};		};
if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {		if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
if (const auto *Entry =		if (const auto *Entry =
ConvertCostTableLookup(MVELoadConversionTbl, ISD,		ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
SrcTy.getSimpleVT(), DstTy.getSimpleVT()))		SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}

static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {		static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},		{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},		{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
};		};
if (SrcTy.isVector() && ST->hasMVEFloatOps()) {		if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
if (const auto *Entry =		if (const auto *Entry =
ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,		ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
SrcTy.getSimpleVT(), DstTy.getSimpleVT()))		SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());		return AdjustCost(Entry->Cost * ST->getMVEVectorCostFactor());
}		}
}		}

// NEON vector operations that can extend their inputs.		// NEON vector operations that can extend their inputs.
if ((ISD == ISD::SIGN_EXTEND \|\| ISD == ISD::ZERO_EXTEND) &&		if ((ISD == ISD::SIGN_EXTEND \|\| ISD == ISD::ZERO_EXTEND) &&
I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {		I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
▲ Show 20 Lines • Show All 220 Lines • ▼ Show 20 Lines	int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
if (ISD == ISD::FP_ROUND \|\| ISD == ISD::FP_EXTEND) {		if (ISD == ISD::FP_ROUND \|\| ISD == ISD::FP_EXTEND) {
// As general rule, fp converts that were not matched above are scalarized		// As general rule, fp converts that were not matched above are scalarized
// and cost 1 vcvt for each lane, so long as the instruction is available.		// and cost 1 vcvt for each lane, so long as the instruction is available.
// If not it will become a series of function calls.		// If not it will become a series of function calls.
const int CallCost = getCallInstrCost(nullptr, Dst, {Src}, CostKind);		const int CallCost = getCallInstrCost(nullptr, Dst, {Src}, CostKind);
int Lanes = 1;		int Lanes = 1;
if (SrcTy.isFixedLengthVector())		if (SrcTy.isFixedLengthVector())
Lanes = SrcTy.getVectorNumElements();		Lanes = SrcTy.getVectorNumElements();
auto IsLegal = [this](EVT VT) {
EVT EltVT = VT.getScalarType();
return (EltVT == MVT::f32 && ST->hasVFP2Base()) \|\|
(EltVT == MVT::f64 && ST->hasFP64()) \|\|
(EltVT == MVT::f16 && ST->hasFullFP16());
};

if (IsLegal(SrcTy) && IsLegal(DstTy))		if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
return Lanes;		return Lanes;
else		else
return Lanes * CallCost;		return Lanes * CallCost;
}		}

// Scalar integer conversion costs.		// Scalar integer conversion costs.
static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {		static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.		// i16 -> i64 requires two dependent operations.
▲ Show 20 Lines • Show All 1,109 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 1,846 Lines • ▼ Show 20 Lines
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends'			; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends'
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
	Show All 22 Lines
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_extends'			; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_extends'
	Show All 27 Lines
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8R-RECIP-LABEL: 'maskedload_extends'			; CHECK-V8R-RECIP-LABEL: 'maskedload_extends'
	▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864u = zext <2 x i8> %loadv2i8 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4816s = sext <4 x i8> %loadv4i8 to <4 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8832u = zext <8 x i8> %loadv8i8 to <8 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8864s = sext <8 x i8> %loadv8i8 to <8 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8864u = zext <8 x i8> %loadv8i8 to <8 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16816s = sext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16816u = zext <16 x i8> %loadv16i8 to <16 x i16>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16832s = sext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16832u = zext <16 x i8> %loadv16i8 to <16 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16864s = sext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16864u = zext <16 x i8> %loadv16i8 to <16 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21632s = sext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21632u = zext <2 x i16> %loadv2i16 to <2 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664s = sext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21664u = zext <2 x i16> %loadv2i16 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632s = sext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632u = zext <4 x i16> %loadv4i16 to <4 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v41664s = sext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v41664u = zext <4 x i16> %loadv4i16 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81632s = sext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81632u = zext <8 x i16> %loadv8i16 to <8 x i32>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81664s = sext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v81664u = zext <8 x i16> %loadv8i16 to <8 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264s = sext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_extends'			; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_extends'
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
	; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)			; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
	▲ Show 20 Lines • Show All 253 Lines • ▼ Show 20 Lines
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2864 = trunc <2 x i64> undef to <2 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816 = trunc <4 x i16> undef to <4 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4832 = trunc <4 x i32> undef to <4 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4864 = trunc <4 x i64> undef to <4 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8816 = trunc <8 x i16> undef to <8 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8832 = trunc <8 x i32> undef to <8 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8864 = trunc <8 x i64> undef to <8 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16816 = trunc <16 x i16> undef to <16 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16832 = trunc <16 x i32> undef to <16 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v16864 = trunc <16 x i64> undef to <16 x i8>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21632 = trunc <2 x i32> undef to <2 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v21664 = trunc <2 x i64> undef to <2 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v41632 = trunc <4 x i32> undef to <4 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41664 = trunc <4 x i64> undef to <4 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v81632 = trunc <8 x i32> undef to <8 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
	▲ Show 20 Lines • Show All 396 Lines • ▼ Show 20 Lines
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4 = fpext <16 x half> %loadv16f16 to <16 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v5 = fpext <2 x half> %loadv2f16 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v6 = fpext <4 x half> %loadv4f16 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v7 = fpext <8 x half> %loadv8f16 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v8 = fpext <16 x half> %loadv16f16 to <16 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
	▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines
	; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)			; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
	; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)			; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
	; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)			; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
	; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc'			; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc'
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
	; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)			; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
	▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines
	; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)			; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
	; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)			; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
	; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)			; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
	; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef			; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
	;			;
	; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc'			; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc'
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21664 = fptrunc <2 x double> undef to <2 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v41632 = fptrunc <4 x float> undef to <4 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v41664 = fptrunc <4 x double> undef to <4 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81632 = fptrunc <8 x float> undef to <8 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
	; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)			; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
	▲ Show 20 Lines • Show All 125 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reduces-vf.ll

This file was added.

				; RUN: opt < %s -mattr=+mve,+mve.fp -loop-vectorize -S \| FileCheck %s --check-prefixes=DEFAULT
				; RUN: opt < %s -mattr=+mve,+mve.fp -loop-vectorize -prefer-predicate-over-epilog -S \| FileCheck %s --check-prefixes=TAILPRED

				target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
				target triple = "thumbv8.1m.main-arm-none-eabi"

				; When TP is disabled, this test can vectorize with a VF of 16.
				; When TP is enabled, this test should vectorize with a VF of 8.
				;
				; DEFAULT: load <16 x i8>, <16 x i8>*
				; DEFAULT: sext <16 x i8> %{{.*}} to <16 x i16>
				; DEFAULT: add <16 x i16>
				; DEFAULT-NOT: llvm.masked.load
				; DEFAULT-NOT: llvm.masked.store
				;
				; TAILPRED: llvm.masked.load.v8i8.p0v8i8
				; TAILPRED: sext <8 x i8> %{{.*}} to <8 x i16>
				; TAILPRED: add <8 x i16>
				; TAILPRED: call void @llvm.masked.store.v8i8.p0v8i8
				; TAILPRED-NOT: load <16 x i8>, <16 x i8>*

				define i32 @tp_reduces_vf(i8* nocapture %0, i32 %1, i8** %input) {
				%3 = load i8, i8* %input, align 8
				%4 = sext i32 %1 to i64
				%5 = icmp eq i32 %1, 0
				br i1 %5, label %._crit_edge, label %.preheader47.preheader

				.preheader47.preheader:
				br label %.preheader47

				.preheader47:
				%.050 = phi i64 [ %54, %53 ], [ 0, %.preheader47.preheader ]
				br label %.preheader

				._crit_edge.loopexit:
				br label %._crit_edge

				._crit_edge:
				ret i32 0

				.preheader:
				%indvars.iv51 = phi i32 [ 1, %.preheader47 ], [ %indvars.iv.next52, %52 ]
				%6 = mul nuw nsw i32 %indvars.iv51, 320
				br label %7

				7:
				%indvars.iv = phi i32 [ 1, %.preheader ], [ %indvars.iv.next, %7 ]
				%8 = add nuw nsw i32 %6, %indvars.iv
				%9 = add nsw i32 %8, -320
				%10 = add nsw i32 %8, -321
				%11 = getelementptr inbounds i8, i8* %3, i32 %10
				%12 = load i8, i8* %11, align 1
				%13 = sext i8 %12 to i32
				%14 = getelementptr inbounds i8, i8* %3, i32 %9
				%15 = load i8, i8* %14, align 1
				%16 = sext i8 %15 to i32
				%17 = add nsw i32 %8, -319
				%18 = getelementptr inbounds i8, i8* %3, i32 %17
				%19 = load i8, i8* %18, align 1
				%20 = sext i8 %19 to i32
				%21 = add nsw i32 %8, -1
				%22 = getelementptr inbounds i8, i8* %3, i32 %21
				%23 = load i8, i8* %22, align 1
				%24 = sext i8 %23 to i32
				%25 = getelementptr inbounds i8, i8* %3, i32 %8
				%26 = load i8, i8* %25, align 1
				%27 = sext i8 %26 to i32
				%28 = mul nsw i32 %27, 255
				%29 = add nuw nsw i32 %8, 1
				%30 = getelementptr inbounds i8, i8* %3, i32 %29
				%31 = load i8, i8* %30, align 1
				%32 = sext i8 %31 to i32
				%33 = add nuw nsw i32 %8, 320
				%34 = add nuw nsw i32 %8, 319
				%35 = getelementptr inbounds i8, i8* %3, i32 %34
				%36 = load i8, i8* %35, align 1
				%37 = sext i8 %36 to i32
				%38 = getelementptr inbounds i8, i8* %3, i32 %33
				%39 = load i8, i8* %38, align 1
				%40 = sext i8 %39 to i32
				%41 = add nuw nsw i32 %8, 321
				%42 = getelementptr inbounds i8, i8* %3, i32 %41
				%43 = load i8, i8* %42, align 1
				%44 = sext i8 %43 to i32
				%reass.add = add nsw i32 %16, %13
				%reass.add44 = add nsw i32 %reass.add, %20
				%reass.add45 = add nsw i32 %reass.add44, %24
				%45 = add nsw i32 %reass.add45, %32
				%46 = add nsw i32 %45, %37
				%47 = add nsw i32 %46, %40
				%reass.add46 = add nsw i32 %47, %44
				%reass.mul = mul nsw i32 %reass.add46, -28
				%48 = add nsw i32 %reass.mul, %28
				%49 = lshr i32 %48, 8
				%50 = trunc i32 %49 to i8
				%51 = getelementptr inbounds i8, i8* %0, i32 %8
				store i8 %50, i8* %51, align 1
				%indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
				%exitcond = icmp eq i32 %indvars.iv.next, 319
				br i1 %exitcond, label %52, label %7

				52:
				%indvars.iv.next52 = add nuw nsw i32 %indvars.iv51, 1
				%exitcond53 = icmp eq i32 %indvars.iv.next52, 239
				br i1 %exitcond53, label %53, label %.preheader

				53:
				%54 = add nuw i64 %.050, 1
				%55 = icmp ult i64 %54, %4
				br i1 %55, label %.preheader47, label %._crit_edge.loopexit
				}
				dmgreenAuthorUnsubmitted Done Reply Inline Actions Can you try and cleanup the test. dmgreen: Can you try and cleanup the test.

This is an archive of the discontinued LLVM Phabricator instance.

[Target][ARM] Tune getCastInstrCost for extending masked loads and truncating masked stores
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 281538

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reduces-vf.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Target][ARM] Tune getCastInstrCost for extending masked loads and truncating masked storesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 281538

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/test/Analysis/CostModel/ARM/cast_ldst.ll

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reduces-vf.ll

[Target][ARM] Tune getCastInstrCost for extending masked loads and truncating masked stores
ClosedPublic