Diff 333803

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 385 Lines • ▼ Show 20 Lines	int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type Dst, Type Src,

static const TypeConversionCostTblEntry		static const TypeConversionCostTblEntry
ConversionTbl[] = {		ConversionTbl[] = {
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },		{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },		{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },		{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },		{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },

		// Truncations on nxvmiN
		{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
		david-armUnsubmitted Done Reply Inline Actions I'm not sure that these instructions are 'free', i.e. a cost of 0. For SVE truncation usually involves a cmpXX instruction to generate a predicate, i.e. cmpne p1.d, p0/g, z0.d, z1.d I think we probably want at least a cost of 1 here. david-arm: I'm not sure that these instructions are 'free', i.e. a cost of 0. For SVE truncation usually…
		{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i32, 1 },
		{ ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i64, 1 },
		{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i16, 1 },
		{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i32, 1 },
		{ ISD::TRUNCATE, MVT::nxv4i1, MVT::nxv4i64, 2 },
		{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i16, 1 },
		{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i32, 3 },
		{ ISD::TRUNCATE, MVT::nxv8i1, MVT::nxv8i64, 5 },
		{ ISD::TRUNCATE, MVT::nxv2i16, MVT::nxv2i32, 1 },
		{ ISD::TRUNCATE, MVT::nxv2i32, MVT::nxv2i64, 1 },
		{ ISD::TRUNCATE, MVT::nxv4i16, MVT::nxv4i32, 1 },
		{ ISD::TRUNCATE, MVT::nxv4i32, MVT::nxv4i64, 2 },
		{ ISD::TRUNCATE, MVT::nxv8i16, MVT::nxv8i32, 3 },
		{ ISD::TRUNCATE, MVT::nxv8i32, MVT::nxv8i64, 6 },

// The number of shll instructions for the extension.		// The number of shll instructions for the extension.
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines	ConversionTbl[] = {
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },		{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },

// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2		// Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },		{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },		{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },		{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },		{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },

		// Lowering scalable
		{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
		{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
		{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f64, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f64, 1 },


		// Complex, from nxv2f32 legal type is nxv2i32 (no cost) or nxv2i64 (1 ext)
		{ ISD::FP_TO_SINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
		{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv2i64, MVT::nxv2f32, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f32, 1 },

// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.		// Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },		{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },		{ ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },		{ ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },		{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },		{ ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },		{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },

		// Complex, from nxv2f64: legal type is nxv2i32, 1 narrowing => ~2.
		david-armUnsubmitted Done Reply Inline Actions nit: I think this should be: // From nxvmf32 to nxvmf64 david-arm: nit: I think this should be: // From nxvmf32 to nxvmf64
		{ ISD::FP_TO_SINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv2i8, MVT::nxv2f64, 2 },
		david-armUnsubmitted Done Reply Inline Actions Are we missing other types/combinations here too? For example: { ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 }, and { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 }, david-arm: Are we missing other types/combinations here too? For example: { ISD::FP_ROUND, MVT::nxv2f16…
		david-armUnsubmitted Done Reply Inline Actions I think any conversions that involve illegal types that are too large for a single register will be split up into multiple instructions. For example, nxv8f32 is twice the size of a normal SVE register, which means we actually need 2 instructions that convert nxv4f32 -> nxv4f16. Then, we need a final third instruction to interleave these two results together. I'd expect something a bit like: { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 }, (2 converts + interleave) ... { ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 }, (2 converts + interleave) ... { ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 }, (4 converts + 3 interleaves) ... { ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 }, (2 converts + interleave) ... { ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 }, (4 converts + 2 interleaves) It's worth pointing out that these costs are just estimates - the most important thing is the costs should be higher to reflect the increased complexity of the operation. david-arm: I think any conversions that involve illegal types that are too large for a single register…
		{ ISD::FP_TO_UINT, MVT::nxv2i32, MVT::nxv2f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv2i16, MVT::nxv2f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv2i8, MVT::nxv2f64, 2 },

		// Complex, from nxv4f32 legal type is nxv4i16, 1 narrowing => ~2
		{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f32, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f32, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f32, 2 },

		// Complex, from nxv8f64: legal type is nxv8i32, 1 narrowing => ~2.
		{ ISD::FP_TO_SINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv8i32, MVT::nxv8f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f64, 2 },

		// Complex, from nxv4f64: legal type is nxv4i32, 1 narrowing => ~2.
		{ ISD::FP_TO_SINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv4i8, MVT::nxv4f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv4i32, MVT::nxv4f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv4i16, MVT::nxv4f64, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv4i8, MVT::nxv4f64, 2 },

		// Complex, from nxv8f32: legal type is nxv8i32 (no cost) or nxv8i64 (1 ext).
		{ ISD::FP_TO_SINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
		{ ISD::FP_TO_SINT, MVT::nxv8i16, MVT::nxv8f32, 3 },
		david-armUnsubmitted Done Reply Inline Actions I think for conversions from nxv8f32->nxv8i16 there are two instructions + interleaving required here - so maybe a cost of 3? david-arm: I think for conversions from nxv8f32->nxv8i16 there are two instructions + interleaving…
		{ ISD::FP_TO_SINT, MVT::nxv8i8, MVT::nxv8f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv8i64, MVT::nxv8f32, 2 },
		{ ISD::FP_TO_UINT, MVT::nxv8i16, MVT::nxv8f32, 1 },
		{ ISD::FP_TO_UINT, MVT::nxv8i8, MVT::nxv8f32, 1 },

		// Truncate from nxvmf32 to nxvmf16.
		{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f32, 1 },
		{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1 },
		{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3 },

		// Truncate from nxvmf64 to nxvmf16.
		{ ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1 },
		{ ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3 },
		{ ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7 },

		// Truncate from nxvmf64 to nxvmf32.
		{ ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1 },
		{ ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3 },
		{ ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 6 },

		// Extend from nxvmf16 to nxvmf32.
		{ ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
		{ ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
		{ ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},

		// Extend from nxvmf16 to nxvmf64.
		{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
		{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
		{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},

		// Extend from nxvmf32 to nxvmf64.
		{ ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
		{ ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
		{ ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},

};		};

if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,		if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
DstTy.getSimpleVT(),		DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))		SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);		return AdjustCost(Entry->Cost);

return AdjustCost(		return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));		BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}		}

int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,		int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy,		VectorType *VecTy,
unsigned Index) {		unsigned Index) {

// Make sure we were given a valid extend opcode.		// Make sure we were given a valid extend opcode.
assert((Opcode == Instruction::SExt \|\| Opcode == Instruction::ZExt) &&		assert((Opcode == Instruction::SExt \|\| Opcode == Instruction::ZExt) &&
"Invalid opcode");		"Invalid opcode");
		sdesmalenUnsubmitted Done Reply Inline Actions Please avoid changing the formatting of the table, because that makes this diff unnecessarily big. sdesmalen: Please avoid changing the formatting of the table, because that makes this diff unnecessarily…
		nashermAuthorUnsubmitted Done Reply Inline Actions I believe this was a clang-format change. nasherm: I believe this was a clang-format change.
		david-armUnsubmitted Not Done Reply Inline Actions Hi @nasherm, yeah unfortunately it does happen on my patches too. Typically we try to avoid reformatting large tables like this or large switch-case statements that have a pre-existing format. david-arm: Hi @nasherm, yeah unfortunately it does happen on my patches too. Typically we try to avoid…

// We are extending an element we extract from a vector, so the source type		// We are extending an element we extract from a vector, so the source type
// of the extend is the element type of the vector.		// of the extend is the element type of the vector.
auto *Src = VecTy->getElementType();		auto *Src = VecTy->getElementType();

// Sign- and zero-extends are for integer types only.		// Sign- and zero-extends are for integer types only.
assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");		assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type");

▲ Show 20 Lines • Show All 800 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll

This file was added.

				; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning
				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-unknown-linux-gnu"

				define void @sve_fpext() {
				;CHECK-LABEL: 'sve_fpext'
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
				CarolineConcattoUnsubmitted Done Reply Inline Actions I believe you can remove the space before ; and I think you can use CHECK-NEXT CarolineConcatto: I believe you can remove the space before ; and I think you can use CHECK-NEXT
				david-armUnsubmitted Not Done Reply Inline Actions nit: Can you address @CarolineConcatto's comment before merging? For example, I think all these tests just a need a simple sed replace: sed -i 's/ ;CHECK/; CHECK/g' david-arm: nit: Can you address @CarolineConcatto's comment before merging? For example, I think all these…
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>
				%nxv2_f16_to_f32 = fpext <vscale x 2 x half> undef to <vscale x 2 x float>
				%nxv4_f16_to_f32 = fpext <vscale x 4 x half> undef to <vscale x 4 x float>
				CarolineConcattoUnsubmitted Done Reply Inline Actions Also add space between the last CHECK and the first instruction. CarolineConcatto: Also add space between the last CHECK and the first instruction.
				%nxv8_f16_to_f32 = fpext <vscale x 8 x half> undef to <vscale x 8 x float>

				%nxv2_f16_to_f64 = fpext <vscale x 2 x half> undef to <vscale x 2 x double>
				%nxv4_f16_to_f64 = fpext <vscale x 4 x half> undef to <vscale x 4 x double>
				%nxv8_f16_to_f64 = fpext <vscale x 8 x half> undef to <vscale x 8 x double>

				%nxv2_f32_to_f64 = fpext <vscale x 2 x float> undef to <vscale x 2 x double>
				%nxv4_f32_to_f64 = fpext <vscale x 4 x float> undef to <vscale x 4 x double>
				%nxv8_f32_to_f64 = fpext <vscale x 8 x float> undef to <vscale x 8 x double>

				ret void
				}

llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll

This file was added.

				; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -o - -S < %s 2>%t \| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning
				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-unknown-linux-gnu"

				define void @sve-fptoi() {
				;CHECK-LABEL: 'sve-fptoi'
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>

				%nv2f32_to_si8 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i8>
				%nv2f32_to_ui8 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i8>
				%nv2f32_to_si32 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i32>
				%nv2f32_to_ui32 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i32>
				%nv2f32_to_si64 = fptosi <vscale x 2 x float> undef to <vscale x 2 x i64>
				%nv2f32_to_ui64 = fptoui <vscale x 2 x float> undef to <vscale x 2 x i64>

				%nv2f64_to_si8 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i8>
				%nv2f64_to_ui8 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i8>
				%nv2f64_to_si32 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i32>
				%nv2f64_to_ui32 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i32>
				%nv2f64_to_si64 = fptosi <vscale x 2 x double> undef to <vscale x 2 x i64>
				%nv2f64_to_ui64 = fptoui <vscale x 2 x double> undef to <vscale x 2 x i64>

				%nv4f32_to_si8 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i8>
				%nv4f32_to_ui8 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i8>
				%nv4f32_to_si32 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i32>
				%nv4f32_to_ui32 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i32>
				%nv4f32_to_si64 = fptosi <vscale x 4 x float> undef to <vscale x 4 x i64>
				%nv4f32_to_ui64 = fptoui <vscale x 4 x float> undef to <vscale x 4 x i64>

				%nv4f64_to_si8 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i8>
				%nv4f64_to_ui8 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i8>
				%nv4f64_to_si32 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i32>
				%nv4f64_to_ui32 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i32>
				%nv4f64_to_si64 = fptosi <vscale x 4 x double> undef to <vscale x 4 x i64>
				%nv4f64_to_ui64 = fptoui <vscale x 4 x double> undef to <vscale x 4 x i64>

				%nv8f32_to_si8 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i8>
				%nv8f32_to_ui8 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i8>
				%nv8f32_to_si32 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i32>
				%nv8f32_to_ui32 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i32>
				%nv8f32_to_si64 = fptosi <vscale x 8 x float> undef to <vscale x 8 x i64>
				%nv8f32_to_ui64 = fptoui <vscale x 8 x float> undef to <vscale x 8 x i64>

				%nv8f64_to_si8 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i8>
				%nv8f64_to_ui8 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i8>
				%nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
				%nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
				%nv8f64_to_si64 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i64>
				%nv8f64_to_ui64 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i64>

				ret void
				}

llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

This file was added.

				; RUN: opt -cost-model -analyze -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>%t\| FileCheck %s
				; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

				; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
				; WARN-NOT: warning
				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-unknown-linux-gnu"

				define void @sve_fptruncs() {
				;CHECK-LABEL: 'sve_fptruncs'
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>
				%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x half>
				%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x half>
				%nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x half>

				%nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x half>
				%nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x half>
				%nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x half>

				%nxv2_f32_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x float>
				%nxv4_f32_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x float>
				%nxv8_f32_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x float>

				ret void
				}

llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll

	; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s 2>%t \| FileCheck %s			; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s 2>%t \| FileCheck %s
	; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t			; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t

	; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.			; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
	; WARN-NOT: warning			; WARN-NOT: warning

	; CHECK: Found an estimated cost of 0 for instruction: %0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>			define void @sve_truncs() {
				;CHECK-LABEL: 'sve_truncs'
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
				;CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>
				%trunc_v2i16_to_i1 = trunc <vscale x 2 x i16> undef to <vscale x 2 x i1>
				%trunc_v2i32_to_i1 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i1>
				%trunc_v2i64_to_i1 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i1>

				%trunc_v4i16_to_i1 = trunc <vscale x 4 x i16> undef to <vscale x 4 x i1>
				%trunc_v4i32_to_i1 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i1>
				%trunc_v4i64_to_i1 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i1>

				%trunc_v8i16_to_i1 = trunc <vscale x 8 x i16> undef to <vscale x 8 x i1>
				%trunc_v8i32_to_i1 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i1>
				%trunc_v8i64_to_i1 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i1>

				%trunc_v2i32_to_i16 = trunc <vscale x 2 x i32> undef to <vscale x 2 x i16>
				%trunc_v2i64_to_i32 = trunc <vscale x 2 x i64> undef to <vscale x 2 x i32>

				%trunc_v4i32_to_i16 = trunc <vscale x 4 x i32> undef to <vscale x 4 x i16>
				%trunc_v4i64_to_i32 = trunc <vscale x 4 x i64> undef to <vscale x 4 x i32>

				%trunc_v8i32_to_i16 = trunc <vscale x 8 x i32> undef to <vscale x 8 x i16>
				%trunc_v8i64_to_i32 = trunc <vscale x 8 x i64> undef to <vscale x 8 x i32>

	define void @trunc_nxv2i64_to_nxv2i32(<vscale x 2 x i32>* %ptr, <vscale x 2 x i64> %v) {
	entry:
	%0 = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
	store <vscale x 2 x i32> %0, <vscale x 2 x i32>* %ptr
	ret void			ret void
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CostModel] Add instruction cost for operations on scalable vectors
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 333803

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll

llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll

llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CostModel] Add instruction cost for operations on scalable vectorsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 333803

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll

llvm/test/Analysis/CostModel/AArch64/sve-fptoi.ll

llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

llvm/test/Analysis/CostModel/AArch64/sve-trunc.ll

[SVE][CostModel] Add instruction cost for operations on scalable vectors
ClosedPublic