Diff 152443

llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Show First 20 Lines • Show All 931 Lines • ▼ Show 20 Lines	int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))		if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
return LT.first * Entry->Cost;		return LT.first * Entry->Cost;

return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);		return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
}		}

int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,		int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {		Type *SubTp) {
		if (Kind == TTI::SK_Transpose \|\| Kind == TTI::SK_Select \|\|
// Transpose shuffle kinds can be performed with 'trn1/trn2' and 'zip1/zip2'		Kind == TTI::SK_PermuteSingleSrc) {
// instructions.		static const CostTblEntry ShuffleTbl[] = {
if (Kind == TTI::SK_Transpose) {		// Transpose shuffle kinds can be performed with 'trn1/trn2' and
static const CostTblEntry TransposeTbl[] = {		// 'zip1/zip2' instructions.
{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},		{ TTI::SK_Transpose, MVT::v8i8, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},		{ TTI::SK_Transpose, MVT::v16i8, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},		{ TTI::SK_Transpose, MVT::v4i16, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},		{ TTI::SK_Transpose, MVT::v8i16, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},		{ TTI::SK_Transpose, MVT::v2i32, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},		{ TTI::SK_Transpose, MVT::v4i32, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},		{ TTI::SK_Transpose, MVT::v2i64, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},		{ TTI::SK_Transpose, MVT::v2f32, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},		{ TTI::SK_Transpose, MVT::v4f32, 1 },
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},		{ TTI::SK_Transpose, MVT::v2f64, 1 },
		// Select shuffle kinds.
		// TODO: handle vXi8/vXi16.
		{ TTI::SK_Select, MVT::v2i32, 1 }, // mov.
		{ TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
		{ TTI::SK_Select, MVT::v2i64, 1 }, // mov.
		{ TTI::SK_Select, MVT::v2f32, 1 }, // mov.
		{ TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
		{ TTI::SK_Select, MVT::v2f64, 1 }, // mov.
		// PermuteSingleSrc shuffle kinds.
		// TODO: handle vXi8/vXi16.
		{ TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
		{ TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
		{ TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
		{ TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
		{ TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
		{ TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
};		};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);		std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry =		if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
CostTableLookup(TransposeTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;		return LT.first * Entry->Cost;
}		}

return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);		return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}		}

llvm/trunk/test/Analysis/CostModel/AArch64/shuffle-select.ll

	Show All 33 Lines
	; CODE-LABEL: sel.v8i16			; CODE-LABEL: sel.v8i16
	; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b			; CODE: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
	define <8 x i16> @sel.v8i16(<8 x i16> %v0, <8 x i16> %v1) {			define <8 x i16> @sel.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
	%tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>			%tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
	ret <8 x i16> %tmp0			ret <8 x i16> %tmp0
	}			}

	; COST-LABEL: sel.v2i32			; COST-LABEL: sel.v2i32
	; COST: Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3>			; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3>
	; CODE-LABEL: sel.v2i32			; CODE-LABEL: sel.v2i32
	; CODE: mov v0.s[1], v1.s[1]			; CODE: mov v0.s[1], v1.s[1]
	define <2 x i32> @sel.v2i32(<2 x i32> %v0, <2 x i32> %v1) {			define <2 x i32> @sel.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
	%tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3>			%tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 3>
	ret <2 x i32> %tmp0			ret <2 x i32> %tmp0
	}			}

	; COST-LABEL: sel.v4i32			; COST-LABEL: sel.v4i32
	; COST: Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>			; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
	; CODE-LABEL: sel.v4i32			; CODE-LABEL: sel.v4i32
	; CODE: rev64 v0.4s, v0.4s			; CODE: rev64 v0.4s, v0.4s
	; CODE: trn2 v0.4s, v0.4s, v1.4s			; CODE: trn2 v0.4s, v0.4s, v1.4s
	define <4 x i32> @sel.v4i32(<4 x i32> %v0, <4 x i32> %v1) {			define <4 x i32> @sel.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
	%tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>			%tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
	ret <4 x i32> %tmp0			ret <4 x i32> %tmp0
	}			}

	; COST-LABEL: sel.v2i64			; COST-LABEL: sel.v2i64
	; COST: Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3>			; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3>
	; CODE-LABEL: sel.v2i64			; CODE-LABEL: sel.v2i64
	; CODE: mov v0.d[1], v1.d[1]			; CODE: mov v0.d[1], v1.d[1]
	define <2 x i64> @sel.v2i64(<2 x i64> %v0, <2 x i64> %v1) {			define <2 x i64> @sel.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
	%tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3>			%tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 3>
	ret <2 x i64> %tmp0			ret <2 x i64> %tmp0
	}			}

	; COST-LABEL: sel.v2f32			; COST-LABEL: sel.v2f32
	; COST: Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3>			; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3>
	; CODE-LABEL: sel.v2f32			; CODE-LABEL: sel.v2f32
	; CODE: mov v0.s[1], v1.s[1]			; CODE: mov v0.s[1], v1.s[1]
	define <2 x float> @sel.v2f32(<2 x float> %v0, <2 x float> %v1) {			define <2 x float> @sel.v2f32(<2 x float> %v0, <2 x float> %v1) {
	%tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3>			%tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 3>
	ret <2 x float> %tmp0			ret <2 x float> %tmp0
	}			}

	; COST-LABEL: sel.v4f32			; COST-LABEL: sel.v4f32
	; COST: Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>			; COST: Found an estimated cost of 2 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
	; CODE-LABEL: sel.v4f32			; CODE-LABEL: sel.v4f32
	; CODE: rev64 v0.4s, v0.4s			; CODE: rev64 v0.4s, v0.4s
	; CODE: trn2 v0.4s, v0.4s, v1.4s			; CODE: trn2 v0.4s, v0.4s, v1.4s
	define <4 x float> @sel.v4f32(<4 x float> %v0, <4 x float> %v1) {			define <4 x float> @sel.v4f32(<4 x float> %v0, <4 x float> %v1) {
	%tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>			%tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
	ret <4 x float> %tmp0			ret <4 x float> %tmp0
	}			}

	; COST-LABEL: sel.v2f64			; COST-LABEL: sel.v2f64
	; COST: Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3>			; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3>
	; CODE-LABEL: sel.v2f64			; CODE-LABEL: sel.v2f64
	; CODE: mov v0.d[1], v1.d[1]			; CODE: mov v0.d[1], v1.d[1]
	define <2 x double> @sel.v2f64(<2 x double> %v0, <2 x double> %v1) {			define <2 x double> @sel.v2f64(<2 x double> %v0, <2 x double> %v1) {
	%tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3>			%tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 3>
	ret <2 x double> %tmp0			ret <2 x double> %tmp0
	}			}

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll

Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	;
%tmp2.1 = add i64 %tmp1.0, %tmp1.1		%tmp2.1 = add i64 %tmp1.0, %tmp1.1
store i64 %tmp2.0, i64* %c.0, align 8		store i64 %tmp2.0, i64* %c.0, align 8
store i64 %tmp2.1, i64* %c.1, align 8		store i64 %tmp2.1, i64* %c.1, align 8
ret void		ret void
}		}

define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {		define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(		; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <4 x i32> %v0, i32 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <4 x i32> %v0, i32 1		; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[V0_2:%.*]] = extractelement <4 x i32> %v0, i32 2		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[V0_3:%.*]] = extractelement <4 x i32> %v0, i32 3		; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[V1_0:%.*]] = extractelement <4 x i32> %v1, i32 0		; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <4 x i32> %v1, i32 1		; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[V1_2:%.*]] = extractelement <4 x i32> %v1, i32 2		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[V1_3:%.*]] = extractelement <4 x i32> %v1, i32 3		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP0_2:%.*]] = add i32 [[V0_2]], [[V1_2]]		; CHECK-NEXT: [[SHUFFLE3:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP0_3:%.*]] = add i32 [[V0_3]], [[V1_3]]		; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[SHUFFLE2]], [[SHUFFLE3]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[SHUFFLE2]], [[SHUFFLE3]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP1_2:%.*]] = sub i32 [[V0_2]], [[V1_2]]		; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP5]], [[TMP10]]
; CHECK-NEXT: [[TMP1_3:%.*]] = sub i32 [[V0_3]], [[V1_3]]		; CHECK-NEXT: ret <4 x i32> [[TMP11]]
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]
; CHECK-NEXT: [[TMP2_2:%.*]] = add i32 [[TMP0_2]], [[TMP0_3]]
; CHECK-NEXT: [[TMP2_3:%.*]] = add i32 [[TMP1_2]], [[TMP1_3]]
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2_0]], i32 0
; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <4 x i32> [[TMP3_0]], i32 [[TMP2_1]], i32 1
; CHECK-NEXT: [[TMP3_2:%.*]] = insertelement <4 x i32> [[TMP3_1]], i32 [[TMP2_2]], i32 2
; CHECK-NEXT: [[TMP3_3:%.*]] = insertelement <4 x i32> [[TMP3_2]], i32 [[TMP2_3]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
;		;
%v0.0 = extractelement <4 x i32> %v0, i32 0		%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1		%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2		%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3		%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0		%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1		%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2		%v1.2 = extractelement <4 x i32> %v1, i32 2
Show All 14 Lines	;
%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1		%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1
%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2		%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2
%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3		%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3
ret <4 x i32> %tmp3.3		ret <4 x i32> %tmp3.3
}		}

define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {		define <4 x i32> @build_vec_v4i32_reuse_0(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32_reuse_0(		; CHECK-LABEL: @build_vec_v4i32_reuse_0(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <2 x i32> %v0, i32 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i32> %v0, i32 1		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[V1_0:%.*]] = extractelement <2 x i32> %v1, i32 0		; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i32> %v1, i32 1		; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> %v0, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]		; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2_0]], i32 0		; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP5]], [[TMP10]]
; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <4 x i32> [[TMP3_0]], i32 [[TMP2_1]], i32 1		; CHECK-NEXT: [[TMP3_3:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP3_2:%.*]] = insertelement <4 x i32> [[TMP3_1]], i32 [[TMP2_0]], i32 2
; CHECK-NEXT: [[TMP3_3:%.*]] = insertelement <4 x i32> [[TMP3_2]], i32 [[TMP2_1]], i32 3
; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]		; CHECK-NEXT: ret <4 x i32> [[TMP3_3]]
;		;
%v0.0 = extractelement <2 x i32> %v0, i32 0		%v0.0 = extractelement <2 x i32> %v0, i32 0
%v0.1 = extractelement <2 x i32> %v0, i32 1		%v0.1 = extractelement <2 x i32> %v0, i32 1
%v1.0 = extractelement <2 x i32> %v1, i32 0		%v1.0 = extractelement <2 x i32> %v1, i32 0
%v1.1 = extractelement <2 x i32> %v1, i32 1		%v1.1 = extractelement <2 x i32> %v1, i32 1
%tmp0.0 = add i32 %v0.0, %v1.0		%tmp0.0 = add i32 %v0.0, %v1.0
%tmp0.1 = add i32 %v0.1, %v1.1		%tmp0.1 = add i32 %v0.1, %v1.1
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	;
%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1		%tmp3.1 = insertelement <4 x i32> %tmp3.0, i32 %tmp2.1, i32 1
%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2		%tmp3.2 = insertelement <4 x i32> %tmp3.1, i32 %tmp2.2, i32 2
%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3		%tmp3.3 = insertelement <4 x i32> %tmp3.2, i32 %tmp2.3, i32 3
ret <4 x i32> %tmp3.3		ret <4 x i32> %tmp3.3
}		}

define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {		define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @reduction_v4i32(		; CHECK-LABEL: @reduction_v4i32(
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <4 x i32> %v0, i32 0		; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <4 x i32> %v0, i32 1		; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[V0_2:%.*]] = extractelement <4 x i32> %v0, i32 2		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[V0_3:%.*]] = extractelement <4 x i32> %v0, i32 3		; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[V1_0:%.*]] = extractelement <4 x i32> %v1, i32 0		; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[V1_1:%.*]] = extractelement <4 x i32> %v1, i32 1		; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[V1_2:%.*]] = extractelement <4 x i32> %v1, i32 2		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[V1_3:%.*]] = extractelement <4 x i32> %v1, i32 3		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP0_1:%.*]] = add i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[TMP0_2:%.*]] = add i32 [[V0_2]], [[V1_2]]		; CHECK-NEXT: [[SHUFFLE3:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[TMP0_3:%.*]] = add i32 [[V0_3]], [[V1_3]]		; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> [[SHUFFLE2]], [[SHUFFLE3]]
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V1_0]]		; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[SHUFFLE2]], [[SHUFFLE3]]
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i32 [[V0_1]], [[V1_1]]		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[TMP1_2:%.*]] = sub i32 [[V0_2]], [[V1_2]]		; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP5]], [[TMP10]]
; CHECK-NEXT: [[TMP1_3:%.*]] = sub i32 [[V0_3]], [[V1_3]]		; CHECK-NEXT: [[TMP12:%.*]] = lshr <4 x i32> [[TMP11]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP0_1]]		; CHECK-NEXT: [[TMP13:%.*]] = and <4 x i32> [[TMP12]], <i32 65537, i32 65537, i32 65537, i32 65537>
; CHECK-NEXT: [[TMP2_1:%.*]] = add i32 [[TMP1_0]], [[TMP1_1]]		; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <4 x i32> [[TMP13]], <i32 65535, i32 65535, i32 65535, i32 65535>
; CHECK-NEXT: [[TMP2_2:%.*]] = add i32 [[TMP0_2]], [[TMP0_3]]		; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP11]]
; CHECK-NEXT: [[TMP2_3:%.*]] = add i32 [[TMP1_2]], [[TMP1_3]]		; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP3_0:%.*]] = lshr i32 [[TMP2_0]], 15		; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP16]])
; CHECK-NEXT: [[TMP3_1:%.*]] = lshr i32 [[TMP2_1]], 15		; CHECK-NEXT: ret i32 [[TMP17]]
; CHECK-NEXT: [[TMP3_2:%.*]] = lshr i32 [[TMP2_2]], 15
; CHECK-NEXT: [[TMP3_3:%.*]] = lshr i32 [[TMP2_3]], 15
; CHECK-NEXT: [[TMP4_0:%.*]] = and i32 [[TMP3_0]], 65537
; CHECK-NEXT: [[TMP4_1:%.*]] = and i32 [[TMP3_1]], 65537
; CHECK-NEXT: [[TMP4_2:%.*]] = and i32 [[TMP3_2]], 65537
; CHECK-NEXT: [[TMP4_3:%.*]] = and i32 [[TMP3_3]], 65537
; CHECK-NEXT: [[TMP5_0:%.*]] = mul nuw i32 [[TMP4_0]], 65535
; CHECK-NEXT: [[TMP5_1:%.*]] = mul nuw i32 [[TMP4_1]], 65535
; CHECK-NEXT: [[TMP5_2:%.*]] = mul nuw i32 [[TMP4_2]], 65535
; CHECK-NEXT: [[TMP5_3:%.*]] = mul nuw i32 [[TMP4_3]], 65535
; CHECK-NEXT: [[TMP6_0:%.*]] = add i32 [[TMP5_0]], [[TMP2_0]]
; CHECK-NEXT: [[TMP6_1:%.*]] = add i32 [[TMP5_1]], [[TMP2_1]]
; CHECK-NEXT: [[TMP6_2:%.*]] = add i32 [[TMP5_2]], [[TMP2_2]]
; CHECK-NEXT: [[TMP6_3:%.*]] = add i32 [[TMP5_3]], [[TMP2_3]]
; CHECK-NEXT: [[TMP7_0:%.*]] = xor i32 [[TMP6_0]], [[TMP5_0]]
; CHECK-NEXT: [[TMP7_1:%.*]] = xor i32 [[TMP6_1]], [[TMP5_1]]
; CHECK-NEXT: [[TMP7_2:%.*]] = xor i32 [[TMP6_2]], [[TMP5_2]]
; CHECK-NEXT: [[TMP7_3:%.*]] = xor i32 [[TMP6_3]], [[TMP5_3]]
; CHECK-NEXT: [[REDUCE_0:%.*]] = add i32 [[TMP7_1]], [[TMP7_0]]
; CHECK-NEXT: [[REDUCE_1:%.*]] = add i32 [[REDUCE_0]], [[TMP7_2]]
; CHECK-NEXT: [[REDUCE_2:%.*]] = add i32 [[REDUCE_1]], [[TMP7_3]]
; CHECK-NEXT: ret i32 [[REDUCE_2]]
;		;
%v0.0 = extractelement <4 x i32> %v0, i32 0		%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1		%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2		%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3		%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0		%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1		%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2		%v1.2 = extractelement <4 x i32> %v1, i32 2
Show All 38 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][AArch64] Add some initial costs for SK_Select and SK_PermuteSingleSrc
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 152443

llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/trunk/test/Analysis/CostModel/AArch64/shuffle-select.ll

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][AArch64] Add some initial costs for SK_Select and SK_PermuteSingleSrcClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 152443

llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

llvm/trunk/test/Analysis/CostModel/AArch64/shuffle-select.ll

llvm/trunk/test/Transforms/SLPVectorizer/AArch64/transpose.ll

[CostModel][AArch64] Add some initial costs for SK_Select and SK_PermuteSingleSrc
ClosedPublic