This is an archive of the discontinued LLVM Phabricator instance.

[X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128-bit inputs
ClosedPublic

Authored by craig.topper on Aug 13 2019, 2:18 PM.

Download Raw Diff

Details

Reviewers

RKSimon
spatel

Commits

rG30d3e9c39520: [X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128…
rL368858: [X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128…

Summary

Now that we legalize by widening, the element types here won't change. Previously these were modeled as the elements being widened and then the instruction might become an AND or SHL/ASHR pair. But now they'll become something like a ZERO_EXTEND_VECTOR_INREG/SIGN_EXTEND_VECTOR_INREG.

For AVX2, when the destination type is legal its clear the cost should be 1 since we have extend instructions that can produce 256 bit vectors from less than 128 bit vectors. I'm a little less sure about AVX1 costs, but I think the ones I changed were definitely too high, but they might still be too high.

Diff Detail

Repository: rL LLVM

Event Timeline

craig.topper created this revision.Aug 13 2019, 2:18 PM

Herald added a project: Restricted Project. · View Herald TranscriptAug 13 2019, 2:18 PM

Herald added a subscriber: hiraditya. · View Herald Transcript

LGTM - I agree the AVX1 256-bit result cases might need further tweaking (especially for extension ratio = 2 as we can cheaply use movx+unpckh), but that can wait for now.

llvm/test/Transforms/SLPVectorizer/X86/sext.ll
611 ↗	(On Diff #214918)	Please can you raise a bug about the lower v2i8 sext vectorizing but the upper 2 x i8 not?

This revision is now accepted and ready to land.Aug 14 2019, 5:45 AM

Closed by commit rL368858: [X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128… (authored by ctopper). · Explain WhyAug 14 2019, 7:51 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86TargetTransformInfo.cpp

22 lines

test/

Analysis/

CostModel/

X86/

cast.ll

30 lines

extend.ll

68 lines

min-legal-vector-width.ll

18 lines

Transforms/

SLPVectorizer/

X86/

cast.ll

43 lines

sext.ll

287 lines

zext.ll

140 lines

Diff 215125

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 1,308 Lines • ▼ Show 20 Lines	static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {

// v16i1 -> v16i32 - load + broadcast		// v16i1 -> v16i32 - load + broadcast
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },		{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },		{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },		{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },		{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },		{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },		{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },		{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
		{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },		{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
		{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },		{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },		{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },

{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },		{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },		{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },		{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },		{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },		{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
Show All 39 Lines	static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },		{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
};		};

static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {		static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },		{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },		{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },

{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },		{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },		{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },		{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },		{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },		{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },		{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 4 },

{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },		{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },		{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },

{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },		{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};		};

static const TypeConversionCostTblEntry AVXConversionTbl[] = {		static const TypeConversionCostTblEntry AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },		{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },		{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },		{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },		{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },		{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 4 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },		{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 4 },

{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },		{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 4 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },		{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
▲ Show 20 Lines • Show All 2,225 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/CostModel/X86/cast.ll

	Show First 20 Lines • Show All 133 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <8 x i1> %in to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <8 x i1> %in to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %S = sext <8 x i1> %in to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %S = sext <8 x i1> %in to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = sext <8 x i16> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = sext <8 x i16> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = zext <8 x i16> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = zext <8 x i16> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <4 x i32> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <4 x i32> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = trunc <4 x i64> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %G = trunc <8 x i64> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'zext_sext'			; AVX2-LABEL: 'zext_sext'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %Z = zext <8 x i1> %in to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %Z = zext <8 x i1> %in to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %S = sext <8 x i1> %in to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %S = sext <8 x i1> %in to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %G = trunc <8 x i64> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512-LABEL: 'zext_sext'			; AVX512-LABEL: 'zext_sext'
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %Z = zext <8 x i1> %in to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %Z = zext <8 x i1> %in to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %S = sext <8 x i1> %in to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %S = sext <8 x i1> %in to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = zext <16 x i8> undef to <16 x i16>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = sext <16 x i8> undef to <16 x i16>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i16> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i16> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <4 x i32> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <4 x i32> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = zext <8 x i32> undef to <8 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sext <8 x i32> undef to <8 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16>			; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F = trunc <8 x i32> undef to <8 x i16>
	▲ Show 20 Lines • Show All 282 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/CostModel/X86/extend.ll

	Show First 20 Lines • Show All 98 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'zext_vXi16'			; AVX2-LABEL: 'zext_vXi16'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512-LABEL: 'zext_vXi16'			; AVX512-LABEL: 'zext_vXi16'
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i16> undef to <2 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i16> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i16> undef to <8 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i16> undef to <2 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i16> undef to <4 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i16> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i16> undef to <16 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'zext_vXi16'			; BTVER2-LABEL: 'zext_vXi16'
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'zext_vXi8'			; AVX2-LABEL: 'zext_vXi8'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'zext_vXi8'			; AVX512F-LABEL: 'zext_vXi8'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512BW-LABEL: 'zext_vXi8'			; AVX512BW-LABEL: 'zext_vXi8'
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i8> undef to <2 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = zext <4 x i8> undef to <4 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = zext <8 x i8> undef to <8 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i8> undef to <2 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i8> undef to <4 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = zext <8 x i8> undef to <8 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = zext <16 x i8> undef to <16 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i8> undef to <2 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	▲ Show 20 Lines • Show All 260 Lines • ▼ Show 20 Lines
	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>			; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>			; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>			; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>			; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX1-LABEL: 'sext_vXi16'			; AVX1-LABEL: 'sext_vXi16'
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'sext_vXi16'			; AVX2-LABEL: 'sext_vXi16'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512-LABEL: 'sext_vXi16'			; AVX512-LABEL: 'sext_vXi16'
	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'sext_vXi16'			; BTVER2-LABEL: 'sext_vXi16'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i16> undef to <2 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i16> undef to <4 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i16> undef to <8 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i16> undef to <2 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i16> undef to <4 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i16> undef to <8 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i16> undef to <16 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	%V2i64 = sext <2 x i16> undef to <2 x i64>			%V2i64 = sext <2 x i16> undef to <2 x i64>
	%V4i64 = sext <4 x i16> undef to <4 x i64>			%V4i64 = sext <4 x i16> undef to <4 x i64>
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX1-LABEL: 'sext_vXi8'			; AVX1-LABEL: 'sext_vXi8'
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'sext_vXi8'			; AVX2-LABEL: 'sext_vXi8'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'sext_vXi8'			; AVX512F-LABEL: 'sext_vXi8'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512BW-LABEL: 'sext_vXi8'			; AVX512BW-LABEL: 'sext_vXi8'
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'sext_vXi8'			; BTVER2-LABEL: 'sext_vXi8'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i8> undef to <2 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i8> undef to <4 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = sext <8 x i8> undef to <8 x i64>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i8> undef to <2 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = sext <4 x i8> undef to <4 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i8> undef to <8 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i8> undef to <16 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i8> undef to <2 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	▲ Show 20 Lines • Show All 169 Lines • Show Last 20 Lines

llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,VEC256,AVX		; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,VEC256,AVX
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512vl,+avx512bw,+avx512dq,+prefer-256-bit \| FileCheck %s --check-prefixes=CHECK,VEC256,SKX256		; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512vl,+avx512bw,+avx512dq,+prefer-256-bit \| FileCheck %s --check-prefixes=CHECK,VEC256,SKX256
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512vl,+avx512bw,+avx512dq,-prefer-256-bit \| FileCheck %s --check-prefixes=CHECK,VEC512		; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512vl,+avx512bw,+avx512dq,-prefer-256-bit \| FileCheck %s --check-prefixes=CHECK,VEC512

define void @zext256() "min-legal-vector-width"="256" {		define void @zext256() "min-legal-vector-width"="256" {
; VEC256-LABEL: 'zext256'		; VEC256-LABEL: 'zext256'
; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>		; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>		; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; VEC512-LABEL: 'zext256'		; VEC512-LABEL: 'zext256'
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i32> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = zext <16 x i8> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <16 x i16> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = zext <32 x i8> undef to <32 x i16>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%A = zext <8 x i16> undef to <8 x i64>		%A = zext <8 x i16> undef to <8 x i64>
%B = zext <8 x i32> undef to <8 x i64>		%B = zext <8 x i32> undef to <8 x i64>
%C = zext <16 x i8> undef to <16 x i32>		%C = zext <16 x i8> undef to <16 x i32>
%D = zext <16 x i16> undef to <16 x i32>		%D = zext <16 x i16> undef to <16 x i32>
%E = zext <32 x i8> undef to <32 x i16>		%E = zext <32 x i8> undef to <32 x i16>
ret void		ret void
}		}

define void @zext512() "min-legal-vector-width"="512" {		define void @zext512() "min-legal-vector-width"="512" {
; AVX-LABEL: 'zext512'		; AVX-LABEL: 'zext512'
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = zext <8 x i16> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; SKX256-LABEL: 'zext512'		; SKX256-LABEL: 'zext512'
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>
Show All 16 Lines	;
%C = zext <16 x i8> undef to <16 x i32>		%C = zext <16 x i8> undef to <16 x i32>
%D = zext <16 x i16> undef to <16 x i32>		%D = zext <16 x i16> undef to <16 x i32>
%E = zext <32 x i8> undef to <32 x i16>		%E = zext <32 x i8> undef to <32 x i16>
ret void		ret void
}		}

define void @sext256() "min-legal-vector-width"="256" {		define void @sext256() "min-legal-vector-width"="256" {
; VEC256-LABEL: 'sext256'		; VEC256-LABEL: 'sext256'
; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>		; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>		; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>		; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; VEC512-LABEL: 'sext256'		; VEC512-LABEL: 'sext256'
; VEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%A = sext <8 x i8> undef to <8 x i64>		%A = sext <8 x i8> undef to <8 x i64>
%B = sext <8 x i16> undef to <8 x i64>		%B = sext <8 x i16> undef to <8 x i64>
%C = sext <8 x i32> undef to <8 x i64>		%C = sext <8 x i32> undef to <8 x i64>
%D = sext <16 x i8> undef to <16 x i32>		%D = sext <16 x i8> undef to <16 x i32>
%E = sext <16 x i16> undef to <16 x i32>		%E = sext <16 x i16> undef to <16 x i32>
%F = sext <32 x i8> undef to <32 x i16>		%F = sext <32 x i8> undef to <32 x i16>
ret void		ret void
}		}

define void @sext512() "min-legal-vector-width"="512" {		define void @sext512() "min-legal-vector-width"="512" {
; AVX-LABEL: 'sext512'		; AVX-LABEL: 'sext512'
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A = sext <8 x i8> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %B = sext <8 x i16> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; SKX256-LABEL: 'sext512'		; SKX256-LABEL: 'sext512'
; SKX256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>		; SKX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; VEC512-LABEL: 'sext512'		; VEC512-LABEL: 'sext512'
; VEC512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %A = sext <8 x i8> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>		; VEC512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; VEC512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%A = sext <8 x i8> undef to <8 x i64>		%A = sext <8 x i8> undef to <8 x i64>
%B = sext <8 x i16> undef to <8 x i64>		%B = sext <8 x i16> undef to <8 x i64>
%C = sext <8 x i32> undef to <8 x i64>		%C = sext <8 x i32> undef to <8 x i64>
%D = sext <16 x i8> undef to <16 x i32>		%D = sext <16 x i8> undef to <16 x i32>
%E = sext <16 x i16> undef to <16 x i32>		%E = sext <16 x i16> undef to <16 x i32>
%F = sext <32 x i8> undef to <32 x i16>		%F = sext <32 x i8> undef to <32 x i16>
ret void		ret void
}		}

llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S \| FileCheck %s		; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S \| FileCheck %s --check-prefixes=CHECK,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S \| FileCheck %s		; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S \| FileCheck %s --check-prefixes=CHECK,AVX

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

; int test_sext_4i8_to_4i32(int * restrict A, char * restrict B) {		; int test_sext_4i8_to_4i32(int * restrict A, char * restrict B) {
; A[0] = B[0];		; A[0] = B[0];
; A[1] = B[1];		; A[1] = B[1];
; A[2] = B[2];		; A[2] = B[2];
; A[3] = B[3];		; A[3] = B[3];
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	entry:
%3 = load i16, i16* %arrayidx8, align 1		%3 = load i16, i16* %arrayidx8, align 1
%conv9 = zext i16 %3 to i32		%conv9 = zext i16 %3 to i32
%arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3		%arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
store i32 %conv9, i32* %arrayidx10, align 4		store i32 %conv9, i32* %arrayidx10, align 4
ret i32 undef		ret i32 undef
}		}

define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) {		define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) {
; CHECK-LABEL: @test_sext_4i16_to_4i64(		; SSE42-LABEL: @test_sext_4i16_to_4i64(
; CHECK-NEXT: entry:		; SSE42-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = bitcast i16 [[B:%.]] to <2 x i16>		; SSE42-NEXT: [[TMP0:%.]] = bitcast i16 [[B:%.]] to <2 x i16>
; CHECK-NEXT: [[TMP1:%.]] = load <2 x i16>, <2 x i16> [[TMP0]], align 1		; SSE42-NEXT: [[TMP1:%.]] = load <2 x i16>, <2 x i16> [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>		; SSE42-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.]] = bitcast i64 [[A:%.]] to <2 x i64>		; SSE42-NEXT: [[TMP3:%.]] = bitcast i64 [[A:%.]] to <2 x i64>
; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4		; SSE42-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4
; CHECK-NEXT: [[ARRAYIDX5:%.]] = getelementptr inbounds i16, i16 [[B]], i64 2		; SSE42-NEXT: [[ARRAYIDX5:%.]] = getelementptr inbounds i16, i16 [[B]], i64 2
; CHECK-NEXT: [[ARRAYIDX7:%.]] = getelementptr inbounds i64, i64 [[A]], i64 2		; SSE42-NEXT: [[ARRAYIDX7:%.]] = getelementptr inbounds i64, i64 [[A]], i64 2
; CHECK-NEXT: [[TMP4:%.]] = bitcast i16 [[ARRAYIDX5]] to <2 x i16>*		; SSE42-NEXT: [[TMP4:%.]] = bitcast i16 [[ARRAYIDX5]] to <2 x i16>*
; CHECK-NEXT: [[TMP5:%.]] = load <2 x i16>, <2 x i16> [[TMP4]], align 1		; SSE42-NEXT: [[TMP5:%.]] = load <2 x i16>, <2 x i16> [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>		; SSE42-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>
; CHECK-NEXT: [[TMP7:%.]] = bitcast i64 [[ARRAYIDX7]] to <2 x i64>*		; SSE42-NEXT: [[TMP7:%.]] = bitcast i64 [[ARRAYIDX7]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4		; SSE42-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
; CHECK-NEXT: ret i64 undef		; SSE42-NEXT: ret i64 undef
		;
		; AVX-LABEL: @test_sext_4i16_to_4i64(
		; AVX-NEXT: entry:
		; AVX-NEXT: [[TMP0:%.]] = bitcast i16 [[B:%.]] to <4 x i16>
		; AVX-NEXT: [[TMP1:%.]] = load <4 x i16>, <4 x i16> [[TMP0]], align 1
		; AVX-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64>
		; AVX-NEXT: [[TMP3:%.]] = bitcast i64 [[A:%.]] to <4 x i64>
		; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 4
		; AVX-NEXT: ret i64 undef
;		;
entry:		entry:
%0 = load i16, i16* %B, align 1		%0 = load i16, i16* %B, align 1
%conv = sext i16 %0 to i64		%conv = sext i16 %0 to i64
store i64 %conv, i64* %A, align 4		store i64 %conv, i64* %A, align 4
%arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1		%arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1
%1 = load i16, i16* %arrayidx2, align 1		%1 = load i16, i16* %arrayidx2, align 1
%conv3 = sext i16 %1 to i64		%conv3 = sext i16 %1 to i64
Show All 14 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll

Show First 20 Lines • Show All 154 Lines • ▼ Show 20 Lines
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1		; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2		; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2		; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3		; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3		; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; SLM-NEXT: ret <4 x i64> [[V3]]		; SLM-NEXT: ret <4 x i64> [[V3]]
;		;
; AVX-LABEL: @loadext_4i8_to_4i64(		; AVX1-LABEL: @loadext_4i8_to_4i64(
; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1		; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2		; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3		; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*		; AVX1-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*
; AVX-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1		; AVX1-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1
; AVX-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1		; AVX1-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1
; AVX-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1		; AVX1-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>		; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
; AVX-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64		; AVX1-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
; AVX-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64		; AVX1-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0		; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0		; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1		; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3		; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; AVX-NEXT: ret <4 x i64> [[V3]]		; AVX1-NEXT: ret <4 x i64> [[V3]]
		;
		; AVX2-LABEL: @loadext_4i8_to_4i64(
		; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
		; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
		; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
		; AVX2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
		; AVX2-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
		; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
		; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
		; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
		; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
		; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
		; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
		; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
		; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
		; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
		; AVX2-NEXT: ret <4 x i64> [[V3]]
		;
		; AVX512-LABEL: @loadext_4i8_to_4i64(
		; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
		; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
		; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
		; AVX512-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
		; AVX512-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
		; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
		; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
		; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
		; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
		; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
		; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
		; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
		; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
		; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
		; AVX512-NEXT: ret <4 x i64> [[V3]]
;		;
%p1 = getelementptr inbounds i8, i8* %p0, i64 1		%p1 = getelementptr inbounds i8, i8* %p0, i64 1
%p2 = getelementptr inbounds i8, i8* %p0, i64 2		%p2 = getelementptr inbounds i8, i8* %p0, i64 2
%p3 = getelementptr inbounds i8, i8* %p0, i64 3		%p3 = getelementptr inbounds i8, i8* %p0, i64 3
%i0 = load i8, i8* %p0, align 1		%i0 = load i8, i8* %p0, align 1
%i1 = load i8, i8* %p1, align 1		%i1 = load i8, i8* %p1, align 1
%i2 = load i8, i8* %p2, align 1		%i2 = load i8, i8* %p2, align 1
%i3 = load i8, i8* %p3, align 1		%i3 = load i8, i8* %p3, align 1
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	;
%v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4		%v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4
%v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5		%v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5
%v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6		%v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6
%v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7		%v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7
ret <8 x i16> %v7		ret <8 x i16> %v7
}		}

define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {		define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
; SSE-LABEL: @loadext_8i8_to_8i32(		; CHECK-LABEL: @loadext_8i8_to_8i32(
; SSE-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1		; CHECK-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; SSE-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2		; CHECK-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; SSE-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3		; CHECK-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; SSE-NEXT: [[P4:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 4		; CHECK-NEXT: [[P4:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 4
; SSE-NEXT: [[P5:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 5		; CHECK-NEXT: [[P5:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 5
; SSE-NEXT: [[P6:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 6		; CHECK-NEXT: [[P6:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 6
; SSE-NEXT: [[P7:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 7		; CHECK-NEXT: [[P7:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 7
; SSE-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <8 x i8>*		; CHECK-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <8 x i8>*
; SSE-NEXT: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[TMP1]], align 1		; CHECK-NEXT: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[TMP1]], align 1
; SSE-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>		; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0		; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
; SSE-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0		; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1		; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
; SSE-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1		; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
; SSE-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2		; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
; SSE-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2		; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
; SSE-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3		; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
; SSE-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3		; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
; SSE-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4		; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
; SSE-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4		; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
; SSE-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5		; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
; SSE-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5		; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
; SSE-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6		; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
; SSE-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6		; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
; SSE-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7		; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; SSE-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7		; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
; SSE-NEXT: ret <8 x i32> [[V7]]		; CHECK-NEXT: ret <8 x i32> [[V7]]
;
; AVX1-LABEL: @loadext_8i8_to_8i32(
; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX1-NEXT: [[P4:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 4
; AVX1-NEXT: [[P5:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 5
; AVX1-NEXT: [[P6:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 6
; AVX1-NEXT: [[P7:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 7
; AVX1-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; AVX1-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; AVX1-NEXT: [[I4:%.]] = load i8, i8 [[P4]], align 1
; AVX1-NEXT: [[I5:%.]] = load i8, i8 [[P5]], align 1
; AVX1-NEXT: [[I6:%.]] = load i8, i8 [[P6]], align 1
; AVX1-NEXT: [[I7:%.]] = load i8, i8 [[P7]], align 1
; AVX1-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
; AVX1-NEXT: [[X4:%.*]] = sext i8 [[I4]] to i32
; AVX1-NEXT: [[X5:%.*]] = sext i8 [[I5]] to i32
; AVX1-NEXT: [[X6:%.*]] = sext i8 [[I6]] to i32
; AVX1-NEXT: [[X7:%.*]] = sext i8 [[I7]] to i32
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; AVX1-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
; AVX1-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
; AVX1-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
; AVX1-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
; AVX1-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; AVX1-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
; AVX1-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4
; AVX1-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5
; AVX1-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6
; AVX1-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7
; AVX1-NEXT: ret <8 x i32> [[V7]]
;
; AVX2-LABEL: @loadext_8i8_to_8i32(
; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX2-NEXT: [[P4:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 4
; AVX2-NEXT: [[P5:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 5
; AVX2-NEXT: [[P6:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 6
; AVX2-NEXT: [[P7:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 7
; AVX2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <8 x i8>*
; AVX2-NEXT: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[TMP1]], align 1
; AVX2-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
; AVX2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
; AVX2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
; AVX2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
; AVX2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
; AVX2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
; AVX2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
; AVX2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
; AVX2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
; AVX2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
; AVX2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; AVX2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
; AVX2-NEXT: ret <8 x i32> [[V7]]
;
; AVX512-LABEL: @loadext_8i8_to_8i32(
; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX512-NEXT: [[P4:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 4
; AVX512-NEXT: [[P5:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 5
; AVX512-NEXT: [[P6:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 6
; AVX512-NEXT: [[P7:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 7
; AVX512-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <8 x i8>*
; AVX512-NEXT: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[TMP1]], align 1
; AVX512-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0
; AVX512-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1
; AVX512-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2
; AVX512-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3
; AVX512-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3
; AVX512-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4
; AVX512-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4
; AVX512-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5
; AVX512-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5
; AVX512-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6
; AVX512-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6
; AVX512-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; AVX512-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7
; AVX512-NEXT: ret <8 x i32> [[V7]]
;		;
%p1 = getelementptr inbounds i8, i8* %p0, i64 1		%p1 = getelementptr inbounds i8, i8* %p0, i64 1
%p2 = getelementptr inbounds i8, i8* %p0, i64 2		%p2 = getelementptr inbounds i8, i8* %p0, i64 2
%p3 = getelementptr inbounds i8, i8* %p0, i64 3		%p3 = getelementptr inbounds i8, i8* %p0, i64 3
%p4 = getelementptr inbounds i8, i8* %p0, i64 4		%p4 = getelementptr inbounds i8, i8* %p0, i64 4
%p5 = getelementptr inbounds i8, i8* %p0, i64 5		%p5 = getelementptr inbounds i8, i8* %p0, i64 5
%p6 = getelementptr inbounds i8, i8* %p0, i64 6		%p6 = getelementptr inbounds i8, i8* %p0, i64 6
%p7 = getelementptr inbounds i8, i8* %p0, i64 7		%p7 = getelementptr inbounds i8, i8* %p0, i64 7
▲ Show 20 Lines • Show All 258 Lines • ▼ Show 20 Lines
; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1		; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2		; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2		; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3		; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3		; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; SLM-NEXT: ret <4 x i64> [[V3]]		; SLM-NEXT: ret <4 x i64> [[V3]]
;		;
; AVX-LABEL: @loadext_4i16_to_4i64(		; AVX1-LABEL: @loadext_4i16_to_4i64(
; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1		; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2		; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3		; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
; AVX-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*		; AVX1-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*
; AVX-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1		; AVX1-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1
; AVX-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1		; AVX1-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1
; AVX-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1		; AVX1-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1
; AVX-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>		; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
; AVX-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64		; AVX1-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
; AVX-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64		; AVX1-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0		; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0		; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1		; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3		; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; AVX-NEXT: ret <4 x i64> [[V3]]		; AVX1-NEXT: ret <4 x i64> [[V3]]
		;
		; AVX2-LABEL: @loadext_4i16_to_4i64(
		; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
		; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
		; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
		; AVX2-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
		; AVX2-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
		; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
		; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
		; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
		; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
		; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
		; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
		; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
		; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
		; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
		; AVX2-NEXT: ret <4 x i64> [[V3]]
		;
		; AVX512-LABEL: @loadext_4i16_to_4i64(
		; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
		; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
		; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
		; AVX512-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
		; AVX512-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
		; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
		; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
		; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
		; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
		; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
		; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
		; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
		; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
		; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
		; AVX512-NEXT: ret <4 x i64> [[V3]]
;		;
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
%i0 = load i16, i16* %p0, align 1		%i0 = load i16, i16* %p0, align 1
%i1 = load i16, i16* %p1, align 1		%i1 = load i16, i16* %p1, align 1
%i2 = load i16, i16* %p2, align 1		%i2 = load i16, i16* %p2, align 1
%i3 = load i16, i16* %p3, align 1		%i3 = load i16, i16* %p3, align 1
▲ Show 20 Lines • Show All 228 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll

	Show First 20 Lines • Show All 119 Lines • ▼ Show 20 Lines
	; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1			; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2			; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2			; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3			; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3			; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; SLM-NEXT: ret <4 x i64> [[V3]]			; SLM-NEXT: ret <4 x i64> [[V3]]
	;			;
	; AVX-LABEL: @loadext_4i8_to_4i64(			; AVX1-LABEL: @loadext_4i8_to_4i64(
	; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1			; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
	; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2			; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
	; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3			; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
	; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*			; AVX1-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*
	; AVX-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1			; AVX1-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1
	; AVX-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1			; AVX1-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1
	; AVX-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1			; AVX1-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1
	; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>			; AVX1-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
	; AVX-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64			; AVX1-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64
	; AVX-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64			; AVX1-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64
	; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0			; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
	; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0			; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1			; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
	; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
	; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3			; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; AVX-NEXT: ret <4 x i64> [[V3]]			; AVX1-NEXT: ret <4 x i64> [[V3]]
				;
				; AVX2-LABEL: @loadext_4i8_to_4i64(
				; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
				; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
				; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
				; AVX2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
				; AVX2-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
				; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
				; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
				; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
				; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
				; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
				; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
				; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
				; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
				; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
				; AVX2-NEXT: ret <4 x i64> [[V3]]
				;
				; AVX512-LABEL: @loadext_4i8_to_4i64(
				; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
				; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
				; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
				; AVX512-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
				; AVX512-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
				; AVX512-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
				; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
				; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
				; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
				; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
				; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
				; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
				; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
				; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
				; AVX512-NEXT: ret <4 x i64> [[V3]]
	;			;
	%p1 = getelementptr inbounds i8, i8* %p0, i64 1			%p1 = getelementptr inbounds i8, i8* %p0, i64 1
	%p2 = getelementptr inbounds i8, i8* %p0, i64 2			%p2 = getelementptr inbounds i8, i8* %p0, i64 2
	%p3 = getelementptr inbounds i8, i8* %p0, i64 3			%p3 = getelementptr inbounds i8, i8* %p0, i64 3
	%i0 = load i8, i8* %p0, align 1			%i0 = load i8, i8* %p0, align 1
	%i1 = load i8, i8* %p1, align 1			%i1 = load i8, i8* %p1, align 1
	%i2 = load i8, i8* %p2, align 1			%i2 = load i8, i8* %p2, align 1
	%i3 = load i8, i8* %p3, align 1			%i3 = load i8, i8* %p3, align 1
	▲ Show 20 Lines • Show All 370 Lines • ▼ Show 20 Lines
	; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1			; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2			; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2			; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3			; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3			; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; SLM-NEXT: ret <4 x i64> [[V3]]			; SLM-NEXT: ret <4 x i64> [[V3]]
	;			;
	; AVX-LABEL: @loadext_4i16_to_4i64(			; AVX1-LABEL: @loadext_4i16_to_4i64(
	; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1			; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
	; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2			; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
	; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3			; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
	; AVX-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*			; AVX1-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*
	; AVX-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1			; AVX1-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1
	; AVX-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1			; AVX1-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1
	; AVX-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1			; AVX1-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1
	; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>			; AVX1-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>
	; AVX-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64			; AVX1-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64
	; AVX-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64			; AVX1-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64
	; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0			; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
	; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0			; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1			; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
	; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
	; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3			; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; AVX-NEXT: ret <4 x i64> [[V3]]			; AVX1-NEXT: ret <4 x i64> [[V3]]
				;
				; AVX2-LABEL: @loadext_4i16_to_4i64(
				; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
				; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
				; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
				; AVX2-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
				; AVX2-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
				; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
				; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
				; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
				; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
				; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
				; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
				; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
				; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
				; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
				; AVX2-NEXT: ret <4 x i64> [[V3]]
				;
				; AVX512-LABEL: @loadext_4i16_to_4i64(
				; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
				; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
				; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
				; AVX512-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
				; AVX512-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
				; AVX512-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
				; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
				; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
				; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
				; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
				; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
				; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
				; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
				; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
				; AVX512-NEXT: ret <4 x i64> [[V3]]
	;			;
	%p1 = getelementptr inbounds i16, i16* %p0, i64 1			%p1 = getelementptr inbounds i16, i16* %p0, i64 1
	%p2 = getelementptr inbounds i16, i16* %p0, i64 2			%p2 = getelementptr inbounds i16, i16* %p0, i64 2
	%p3 = getelementptr inbounds i16, i16* %p0, i64 3			%p3 = getelementptr inbounds i16, i16* %p0, i64 3
	%i0 = load i16, i16* %p0, align 1			%i0 = load i16, i16* %p0, align 1
	%i1 = load i16, i16* %p1, align 1			%i1 = load i16, i16* %p1, align 1
	%i2 = load i16, i16* %p2, align 1			%i2 = load i16, i16* %p2, align 1
	%i3 = load i16, i16* %p3, align 1			%i3 = load i16, i16* %p3, align 1
	▲ Show 20 Lines • Show All 228 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128-bit inputsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 215125

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/trunk/test/Analysis/CostModel/X86/cast.ll

llvm/trunk/test/Analysis/CostModel/X86/extend.ll

llvm/trunk/test/Analysis/CostModel/X86/min-legal-vector-width.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/cast.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/sext.ll

llvm/trunk/test/Transforms/SLPVectorizer/X86/zext.ll

[X86][CostModel] Adjust the costs of ZERO_EXTEND/SIGN_EXTEND with less than 128-bit inputs
ClosedPublic