Diff 245924

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 2,387 Lines • ▼ Show 20 Lines	int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
static const CostTblEntry SLMCostTbl[] = {		static const CostTblEntry SLMCostTbl[] = {
{ ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 },		{ ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 },
{ ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 },		{ ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 },
{ ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 },		{ ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 },
{ ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 }		{ ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 }
};		};

assert(Val->isVectorTy() && "This must be a vector type");		assert(Val->isVectorTy() && "This must be a vector type");

Type *ScalarType = Val->getScalarType();		Type *ScalarType = Val->getScalarType();
		int RegisterFileMoveCost = 0;

if (Index != -1U) {		if (Index != -1U && (Opcode == Instruction::ExtractElement \|\|
		Opcode == Instruction::InsertElement)) {
// Legalize the type.		// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);		std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);

// This type is legalized to a scalar type.		// This type is legalized to a scalar type.
if (!LT.second.isVector())		if (!LT.second.isVector())
return 0;		return 0;

// The type may be split. Normalize the index to the new type.		// The type may be split. Normalize the index to the new type.
		unsigned NumSubVecs = 1;
unsigned Width = LT.second.getVectorNumElements();		unsigned Width = LT.second.getVectorNumElements();
		unsigned SubWidth = Width;
Index = Index % Width;		Index = Index % Width;
		lebedev.riUnsubmitted Not Done Reply Inline Actions I don't really like how we are conflating vector element count and vector/element bit width, It took me a moment to notice that `Width` is actually element count.. lebedev.ri: I don't really like how we are conflating vector element count and vector/element bit width, It…

		// For >128-bit vectors, we need to extract higher 128-bit subvectors.
		// For INSERT_VECTOR_ELT, we also need to insert the subvector back.
		if (LT.second.getSizeInBits() > 128) {
		assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector");
		NumSubVecs = LT.second.getSizeInBits() / 128;
		SubWidth = Width / NumSubVecs;
		if (SubWidth <= Index) {
		RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
		Index %= SubWidth;
		}
		}

if (Index == 0) {		if (Index == 0) {
		RKSimonAuthorUnsubmitted Done Reply Inline Actions We need to make this extractelement only. RKSimon: We need to make this extractelement only.
// Floating point scalars are already located in index #0.		// Floating point scalars are already located in index #0.
if (ScalarType->isFloatingPointTy())		if (ScalarType->isFloatingPointTy())
return 0;		return RegisterFileMoveCost;

// Assume movd/movq XMM <-> GPR is relatively cheap on all targets.		// Assume movd/movq XMM <-> GPR is relatively cheap on all targets.
if (ScalarType->isIntegerTy())		if (ScalarType->isIntegerTy())
return 1;		return 1 + RegisterFileMoveCost;
}		}

int ISD = TLI->InstructionOpcodeToISD(Opcode);		int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Unexpected vector opcode");		assert(ISD && "Unexpected vector opcode");
MVT MScalarTy = LT.second.getScalarType();		MVT MScalarTy = LT.second.getScalarType();
if (ST->isSLM())		if (ST->isSLM())
if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))		if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
return LT.first * Entry->Cost;		return Entry->Cost + RegisterFileMoveCost;

		// Assume pinsr/pextr XMM <-> GPR is relatively cheap on all targets.
		if ((MScalarTy == MVT::i16 && ST->hasSSE2()) \|\|
		(MScalarTy.isInteger() && ST->hasSSE41()))
		return 1 + RegisterFileMoveCost;

		// For extractions we need to shuffle the element to index 0.
		lebedev.riUnsubmitted Not Done Reply Inline Actions insertps/extractps lebedev.ri: insertps/extractps
		// For insertions we need to shuffle the elements to its destination.
		// In both cases we must handle the subvectors move(s).
		// TODO: Under what circumstances should we shuffle using the full width?
		Type *SubVecTy = VectorType::get(Val->getVectorElementType(), SubWidth);
		int ShuffleCost = getShuffleCost(Opcode == Instruction::InsertElement
		? TTI::SK_PermuteTwoSrc
		: TTI::SK_PermuteSingleSrc,
		SubVecTy, 0, SubVecTy);
		int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1;
		return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
}		}

// Add to the base cost if we know that the extracted element of a vector is		// Add to the base cost if we know that the extracted element of a vector is
// destined to be moved to and used in the integer register file.		// destined to be moved to and used in the integer register file.
int RegisterFileMoveCost = 0;
if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())		if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
RegisterFileMoveCost = 1;		RegisterFileMoveCost += 1;

return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;		return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}		}

int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,		int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,		MaybeAlign Alignment, unsigned AddressSpace,
const Instruction *I) {		const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float>		// Handle non-power-of-two vectors such as <3 x float>
▲ Show 20 Lines • Show All 1,388 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/X86/arith-fp.ll

Show First 20 Lines • Show All 678 Lines • ▼ Show 20 Lines	;
%V8F64 = fdiv <8 x double> undef, undef		%V8F64 = fdiv <8 x double> undef, undef

ret i32 undef		ret i32 undef
}		}

define i32 @frem(i32 %arg) {		define i32 @frem(i32 %arg) {
; SSE1-LABEL: 'frem'		; SSE1-LABEL: 'frem'
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = frem <4 x double> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = frem <4 x double> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = frem <8 x double> undef, undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = frem <8 x double> undef, undef
; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE2-LABEL: 'frem'		; SSE2-LABEL: 'frem'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'frem'		; SSE42-LABEL: 'frem'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'frem'		; AVX-LABEL: 'frem'
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = frem <8 x float> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = frem <16 x float> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = frem <4 x double> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef		; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = frem <8 x double> undef, undef
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'frem'		; AVX512-LABEL: 'frem'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = frem <8 x float> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8F32 = frem <8 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16F32 = frem <16 x float> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16F32 = frem <16 x float> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = frem <4 x double> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8F64 = frem <8 x double> undef, undef
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'frem'		; SLM-LABEL: 'frem'
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef		; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'frem'		; GLM-LABEL: 'frem'
; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef		; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'frem'		; BTVER2-LABEL: 'frem'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = frem <4 x float> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = frem <8 x float> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8F32 = frem <8 x float> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = frem <16 x float> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16F32 = frem <16 x float> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = frem <4 x double> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = frem <8 x double> undef, undef
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%F32 = frem float undef, undef		%F32 = frem float undef, undef
%V4F32 = frem <4 x float> undef, undef		%V4F32 = frem <4 x float> undef, undef
%V8F32 = frem <8 x float> undef, undef		%V8F32 = frem <8 x float> undef, undef
%V16F32 = frem <16 x float> undef, undef		%V16F32 = frem <16 x float> undef, undef

%F64 = frem double undef, undef		%F64 = frem double undef, undef
▲ Show 20 Lines • Show All 319 Lines • ▼ Show 20 Lines	;
%V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)		%V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)

ret i32 undef		ret i32 undef
}		}

define i32 @fma(i32 %arg) {		define i32 @fma(i32 %arg) {
; SSE1-LABEL: 'fma'		; SSE1-LABEL: 'fma'
; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; SSE1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE2-LABEL: 'fma'		; SSE2-LABEL: 'fma'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'fma'		; SSE42-LABEL: 'fma'
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fma'		; AVX-LABEL: 'fma'
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'fma'		; AVX512-LABEL: 'fma'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'fma'		; SLM-LABEL: 'fma'
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'fma'		; GLM-LABEL: 'fma'
; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'fma'		; BTVER2-LABEL: 'fma'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)		; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%F32 = call float @llvm.fma.f32(float undef, float undef, float undef)		%F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
%V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)		%V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
%V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)		%V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
%V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)		%V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)

%F64 = call double @llvm.fma.f64(double undef, double undef, double undef)		%F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/X86/extend.ll

	Show First 20 Lines • Show All 368 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'zext_vXi1'			; AVX2-LABEL: 'zext_vXi1'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'zext_vXi1'			; AVX512F-LABEL: 'zext_vXi1'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512BW-LABEL: 'zext_vXi1'			; AVX512BW-LABEL: 'zext_vXi1'
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
	Show All 18 Lines
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = zext i1 undef to i16
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	▲ Show 20 Lines • Show All 385 Lines • ▼ Show 20 Lines
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>			; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>			; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8			; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>			; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>
	; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX2-LABEL: 'sext_vXi1'			; AVX2-LABEL: 'sext_vXi1'
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>			; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>			; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8			; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>			; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>
	; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'sext_vXi1'			; AVX512F-LABEL: 'sext_vXi1'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512BW-LABEL: 'sext_vXi1'			; AVX512BW-LABEL: 'sext_vXi1'
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
	Show All 18 Lines
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	Show All 29 Lines

llvm/test/Analysis/CostModel/X86/fptosi.ll

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
;		;
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm \| FileCheck %s --check-prefixes=SLM		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm \| FileCheck %s --check-prefixes=SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2

define i32 @fptosi_double_i64(i32 %arg) {		define i32 @fptosi_double_i64(i32 %arg) {
; SSE-LABEL: 'fptosi_double_i64'		; SSE2-LABEL: 'fptosi_double_i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptosi_double_i64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptosi_double_i64'		; AVX-LABEL: 'fptosi_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512F-LABEL: 'fptosi_double_i64'		; AVX512F-LABEL: 'fptosi_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512DQ-LABEL: 'fptosi_double_i64'		; AVX512DQ-LABEL: 'fptosi_double_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'fptosi_double_i64'		; SLM-LABEL: 'fptosi_double_i64'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'fptosi_double_i64'		; BTVER2-LABEL: 'fptosi_double_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%I64 = fptosi double undef to i64		%I64 = fptosi double undef to i64
%V2I64 = fptosi <2 x double> undef to <2 x i64>		%V2I64 = fptosi <2 x double> undef to <2 x i64>
%V4I64 = fptosi <4 x double> undef to <4 x i64>		%V4I64 = fptosi <4 x double> undef to <4 x i64>
%V8I64 = fptosi <8 x double> undef to <8 x i64>		%V8I64 = fptosi <8 x double> undef to <8 x i64>
ret i32 undef		ret i32 undef
}		}
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	;
%I16 = fptosi double undef to i16		%I16 = fptosi double undef to i16
%V2I16 = fptosi <2 x double> undef to <2 x i16>		%V2I16 = fptosi <2 x double> undef to <2 x i16>
%V4I16 = fptosi <4 x double> undef to <4 x i16>		%V4I16 = fptosi <4 x double> undef to <4 x i16>
%V8I16 = fptosi <8 x double> undef to <8 x i16>		%V8I16 = fptosi <8 x double> undef to <8 x i16>
ret i32 undef		ret i32 undef
}		}

define i32 @fptosi_double_i8(i32 %arg) {		define i32 @fptosi_double_i8(i32 %arg) {
; SSE-LABEL: 'fptosi_double_i8'		; SSE2-LABEL: 'fptosi_double_i8'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptosi_double_i8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptosi_double_i8'		; AVX-LABEL: 'fptosi_double_i8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
Show All 21 Lines	;
%I8 = fptosi double undef to i8		%I8 = fptosi double undef to i8
%V2I8 = fptosi <2 x double> undef to <2 x i8>		%V2I8 = fptosi <2 x double> undef to <2 x i8>
%V4I8 = fptosi <4 x double> undef to <4 x i8>		%V4I8 = fptosi <4 x double> undef to <4 x i8>
%V8I8 = fptosi <8 x double> undef to <8 x i8>		%V8I8 = fptosi <8 x double> undef to <8 x i8>
ret i32 undef		ret i32 undef
}		}

define i32 @fptosi_float_i64(i32 %arg) {		define i32 @fptosi_float_i64(i32 %arg) {
; SSE-LABEL: 'fptosi_float_i64'		; SSE2-LABEL: 'fptosi_float_i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptosi_float_i64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptosi_float_i64'		; AVX-LABEL: 'fptosi_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512F-LABEL: 'fptosi_float_i64'		; AVX512F-LABEL: 'fptosi_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512DQ-LABEL: 'fptosi_float_i64'		; AVX512DQ-LABEL: 'fptosi_float_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'fptosi_float_i64'		; SLM-LABEL: 'fptosi_float_i64'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'fptosi_float_i64'		; BTVER2-LABEL: 'fptosi_float_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%I64 = fptosi float undef to i64		%I64 = fptosi float undef to i64
%V2I64 = fptosi <2 x float> undef to <2 x i64>		%V2I64 = fptosi <2 x float> undef to <2 x i64>
%V4I64 = fptosi <4 x float> undef to <4 x i64>		%V4I64 = fptosi <4 x float> undef to <4 x i64>
%V8I64 = fptosi <8 x float> undef to <8 x i64>		%V8I64 = fptosi <8 x float> undef to <8 x i64>
%V16I64 = fptosi <16 x float> undef to <16 x i64>		%V16I64 = fptosi <16 x float> undef to <16 x i64>
ret i32 undef		ret i32 undef
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	;
%I16 = fptosi float undef to i16		%I16 = fptosi float undef to i16
%V4I16 = fptosi <4 x float> undef to <4 x i16>		%V4I16 = fptosi <4 x float> undef to <4 x i16>
%V8I16 = fptosi <8 x float> undef to <8 x i16>		%V8I16 = fptosi <8 x float> undef to <8 x i16>
%V16I16 = fptosi <16 x float> undef to <16 x i16>		%V16I16 = fptosi <16 x float> undef to <16 x i16>
ret i32 undef		ret i32 undef
}		}

define i32 @fptosi_float_i8(i32 %arg) {		define i32 @fptosi_float_i8(i32 %arg) {
; SSE-LABEL: 'fptosi_float_i8'		; SSE2-LABEL: 'fptosi_float_i8'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 327 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptosi_float_i8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptosi_float_i8'		; AVX-LABEL: 'fptosi_float_i8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
Show All 27 Lines

llvm/test/Analysis/CostModel/X86/fptoui.ll

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+sse4.2 \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx2 \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq \| FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
;		;
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm \| FileCheck %s --check-prefixes=SLM		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm \| FileCheck %s --check-prefixes=SLM
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2		; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2

define i32 @fptoui_double_i64(i32 %arg) {		define i32 @fptoui_double_i64(i32 %arg) {
; SSE-LABEL: 'fptoui_double_i64'		; SSE2-LABEL: 'fptoui_double_i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64		; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_double_i64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_double_i64'		; AVX-LABEL: 'fptoui_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512F-LABEL: 'fptoui_double_i64'		; AVX512F-LABEL: 'fptoui_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512DQ-LABEL: 'fptoui_double_i64'		; AVX512DQ-LABEL: 'fptoui_double_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'fptoui_double_i64'		; SLM-LABEL: 'fptoui_double_i64'
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'fptoui_double_i64'		; BTVER2-LABEL: 'fptoui_double_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64		; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%I64 = fptoui double undef to i64		%I64 = fptoui double undef to i64
%V2I64 = fptoui <2 x double> undef to <2 x i64>		%V2I64 = fptoui <2 x double> undef to <2 x i64>
%V4I64 = fptoui <4 x double> undef to <4 x i64>		%V4I64 = fptoui <4 x double> undef to <4 x i64>
%V8I64 = fptoui <8 x double> undef to <8 x i64>		%V8I64 = fptoui <8 x double> undef to <8 x i64>
ret i32 undef		ret i32 undef
}		}

define i32 @fptoui_double_i32(i32 %arg) {		define i32 @fptoui_double_i32(i32 %arg) {
; SSE-LABEL: 'fptoui_double_i32'		; SSE2-LABEL: 'fptoui_double_i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_double_i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_double_i32'		; AVX-LABEL: 'fptoui_double_i32'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	;
%I16 = fptoui double undef to i16		%I16 = fptoui double undef to i16
%V2I16 = fptoui <2 x double> undef to <2 x i16>		%V2I16 = fptoui <2 x double> undef to <2 x i16>
%V4I16 = fptoui <4 x double> undef to <4 x i16>		%V4I16 = fptoui <4 x double> undef to <4 x i16>
%V8I16 = fptoui <8 x double> undef to <8 x i16>		%V8I16 = fptoui <8 x double> undef to <8 x i16>
ret i32 undef		ret i32 undef
}		}

define i32 @fptoui_double_i8(i32 %arg) {		define i32 @fptoui_double_i8(i32 %arg) {
; SSE-LABEL: 'fptoui_double_i8'		; SSE2-LABEL: 'fptoui_double_i8'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_double_i8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_double_i8'		; AVX-LABEL: 'fptoui_double_i8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
Show All 21 Lines	;
%I8 = fptoui double undef to i8		%I8 = fptoui double undef to i8
%V2I8 = fptoui <2 x double> undef to <2 x i8>		%V2I8 = fptoui <2 x double> undef to <2 x i8>
%V4I8 = fptoui <4 x double> undef to <4 x i8>		%V4I8 = fptoui <4 x double> undef to <4 x i8>
%V8I8 = fptoui <8 x double> undef to <8 x i8>		%V8I8 = fptoui <8 x double> undef to <8 x i8>
ret i32 undef		ret i32 undef
}		}

define i32 @fptoui_float_i64(i32 %arg) {		define i32 @fptoui_float_i64(i32 %arg) {
; SSE-LABEL: 'fptoui_float_i64'		; SSE2-LABEL: 'fptoui_float_i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64		; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_float_i64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_float_i64'		; AVX-LABEL: 'fptoui_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; AVX-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512F-LABEL: 'fptoui_float_i64'		; AVX512F-LABEL: 'fptoui_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512DQ-LABEL: 'fptoui_float_i64'		; AVX512DQ-LABEL: 'fptoui_float_i64'
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'fptoui_float_i64'		; SLM-LABEL: 'fptoui_float_i64'
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'fptoui_float_i64'		; BTVER2-LABEL: 'fptoui_float_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64		; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>		; BTVER2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%I64 = fptoui float undef to i64		%I64 = fptoui float undef to i64
%V2I64 = fptoui <2 x float> undef to <2 x i64>		%V2I64 = fptoui <2 x float> undef to <2 x i64>
%V4I64 = fptoui <4 x float> undef to <4 x i64>		%V4I64 = fptoui <4 x float> undef to <4 x i64>
%V8I64 = fptoui <8 x float> undef to <8 x i64>		%V8I64 = fptoui <8 x float> undef to <8 x i64>
%V16I64 = fptoui <16 x float> undef to <16 x i64>		%V16I64 = fptoui <16 x float> undef to <16 x i64>
ret i32 undef		ret i32 undef
}		}

define i32 @fptoui_float_i32(i32 %arg) {		define i32 @fptoui_float_i32(i32 %arg) {
; SSE-LABEL: 'fptoui_float_i32'		; SSE2-LABEL: 'fptoui_float_i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>		; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_float_i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_float_i32'		; AVX-LABEL: 'fptoui_float_i32'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>		; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	;
%I16 = fptoui float undef to i16		%I16 = fptoui float undef to i16
%V4I16 = fptoui <4 x float> undef to <4 x i16>		%V4I16 = fptoui <4 x float> undef to <4 x i16>
%V8I16 = fptoui <8 x float> undef to <8 x i16>		%V8I16 = fptoui <8 x float> undef to <8 x i16>
%V16I16 = fptoui <16 x float> undef to <16 x i16>		%V16I16 = fptoui <16 x float> undef to <16 x i16>
ret i32 undef		ret i32 undef
}		}

define i32 @fptoui_float_i8(i32 %arg) {		define i32 @fptoui_float_i8(i32 %arg) {
; SSE-LABEL: 'fptoui_float_i8'		; SSE2-LABEL: 'fptoui_float_i8'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>		; SSE2-NEXT: Cost Model: Found an estimated cost of 327 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'fptoui_float_i8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'fptoui_float_i8'		; AVX-LABEL: 'fptoui_float_i8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>		; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
Show All 27 Lines

llvm/test/Analysis/CostModel/X86/fround.ll

Show All 10 Lines
; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2		; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 \| FileCheck %s --check-prefixes=BTVER2

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"		target triple = "x86_64-apple-macosx10.8.0"

define i32 @ceil(i32 %arg) {		define i32 @ceil(i32 %arg) {
; SSE2-LABEL: 'ceil'		; SSE2-LABEL: 'ceil'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'ceil'		; SSE42-LABEL: 'ceil'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	;
%V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)		%V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)

ret i32 undef		ret i32 undef
}		}

define i32 @floor(i32 %arg) {		define i32 @floor(i32 %arg) {
; SSE2-LABEL: 'floor'		; SSE2-LABEL: 'floor'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'floor'		; SSE42-LABEL: 'floor'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	;
%V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)		%V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)

ret i32 undef		ret i32 undef
}		}

define i32 @nearbyint(i32 %arg) {		define i32 @nearbyint(i32 %arg) {
; SSE2-LABEL: 'nearbyint'		; SSE2-LABEL: 'nearbyint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'nearbyint'		; SSE42-LABEL: 'nearbyint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	;
%V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)		%V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)

ret i32 undef		ret i32 undef
}		}

define i32 @rint(i32 %arg) {		define i32 @rint(i32 %arg) {
; SSE2-LABEL: 'rint'		; SSE2-LABEL: 'rint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'rint'		; SSE42-LABEL: 'rint'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	;
%V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)		%V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)

ret i32 undef		ret i32 undef
}		}

define i32 @trunc(i32 %arg) {		define i32 @trunc(i32 %arg) {
; SSE2-LABEL: 'trunc'		; SSE2-LABEL: 'trunc'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE42-LABEL: 'trunc'		; SSE42-LABEL: 'trunc'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll

				; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx \| FileCheck %s			; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
	target triple = "x86_64-apple-macosx10.8.0"			target triple = "x86_64-apple-macosx10.8.0"

	define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {			define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
	;CHECK: cost of 0 {{.*}} extract			; CHECK-LABEL: 'insert-extract-at-zero-idx'
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %A = extractelement <4 x float> undef, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = extractelement <4 x i32> undef, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C = extractelement <4 x float> undef, i32 1
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = extractelement <8 x float> undef, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = extractelement <8 x float> undef, i32 1
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F = extractelement <8 x float> undef, i32 %arg
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = insertelement <4 x float> undef, float %fl, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %H = insertelement <4 x float> undef, float %fl, i32 1
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I = insertelement <4 x i32> undef, i32 %arg, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %J = insertelement <4 x double> undef, double undef, i32 0
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %K = insertelement <8 x double> undef, double undef, i32 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %L = insertelement <16 x double> undef, double undef, i32 8
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %M = insertelement <16 x double> undef, double undef, i32 9
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
				;
	%A = extractelement <4 x float> undef, i32 0			%A = extractelement <4 x float> undef, i32 0
	;CHECK: cost of 1 {{.*}} extract
	%B = extractelement <4 x i32> undef, i32 0			%B = extractelement <4 x i32> undef, i32 0
	;CHECK: cost of 1 {{.*}} extract
	%C = extractelement <4 x float> undef, i32 1			%C = extractelement <4 x float> undef, i32 1

	;CHECK: cost of 0 {{.*}} extract
	%D = extractelement <8 x float> undef, i32 0			%D = extractelement <8 x float> undef, i32 0
	;CHECK: cost of 1 {{.*}} extract
	%E = extractelement <8 x float> undef, i32 1			%E = extractelement <8 x float> undef, i32 1

	;CHECK: cost of 1 {{.*}} extract
	%F = extractelement <8 x float> undef, i32 %arg			%F = extractelement <8 x float> undef, i32 %arg

	;CHECK: cost of 0 {{.*}} insert
	%G = insertelement <4 x float> undef, float %fl, i32 0			%G = insertelement <4 x float> undef, float %fl, i32 0
	;CHECK: cost of 1 {{.*}} insert
	%H = insertelement <4 x float> undef, float %fl, i32 1			%H = insertelement <4 x float> undef, float %fl, i32 1
	;CHECK: cost of 1 {{.*}} insert
	%I = insertelement <4 x i32> undef, i32 %arg, i32 0			%I = insertelement <4 x i32> undef, i32 %arg, i32 0

	;CHECK: cost of 0 {{.*}} insert
	%J = insertelement <4 x double> undef, double undef, i32 0			%J = insertelement <4 x double> undef, double undef, i32 0

	;CHECK: cost of 0 {{.*}} insert
	%K = insertelement <8 x double> undef, double undef, i32 4			%K = insertelement <8 x double> undef, double undef, i32 4
	;CHECK: cost of 0 {{.*}} insert
	%L = insertelement <16 x double> undef, double undef, i32 8			%L = insertelement <16 x double> undef, double undef, i32 8
	;CHECK: cost of 1 {{.*}} insert
	%M = insertelement <16 x double> undef, double undef, i32 9			%M = insertelement <16 x double> undef, double undef, i32 9
	ret i32 0			ret i32 0
	}			}

llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll

				; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
	; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s \| FileCheck %s -check-prefix=CORE2			; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s \| FileCheck %s -check-prefix=CORE2
	; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -cost-model -analyze < %s \| FileCheck %s -check-prefix=COREI7			; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -cost-model -analyze < %s \| FileCheck %s -check-prefix=COREI7

	; If SSE4.1 roundps instruction is available it is cheap to lower, otherwise			; If SSE4.1 roundps instruction is available it is cheap to lower, otherwise
	; it'll be scalarized into calls which are expensive.			; it'll be scalarized into calls which are expensive.
	define void @test1(float* nocapture %f) nounwind {			define void @test1(float* nocapture %f) nounwind {
				; CORE2-LABEL: 'test1'
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
				; COREI7-LABEL: 'test1'
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
	vector.ph:			vector.ph:
	br label %vector.body			br label %vector.body

	vector.body: ; preds = %vector.body, %vector.ph			vector.body: ; preds = %vector.body, %vector.ph
	%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]			%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
	%0 = getelementptr inbounds float, float* %f, i64 %index			%0 = getelementptr inbounds float, float* %f, i64 %index
	%1 = bitcast float* %0 to <4 x float>*			%1 = bitcast float* %0 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %1, align 4			%wide.load = load <4 x float>, <4 x float>* %1, align 4
	%2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)			%2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
	store <4 x float> %2, <4 x float>* %1, align 4			store <4 x float> %2, <4 x float>* %1, align 4
	%index.next = add i64 %index, 4			%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024			%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body			br i1 %3, label %for.end, label %vector.body

	for.end: ; preds = %vector.body			for.end: ; preds = %vector.body
	ret void			ret void

	; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
	; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)

	; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
	; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)

	}			}

	declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone			declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone

	define void @test2(float* nocapture %f) nounwind {			define void @test2(float* nocapture %f) nounwind {
				; CORE2-LABEL: 'test2'
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
				; COREI7-LABEL: 'test2'
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
	vector.ph:			vector.ph:
	br label %vector.body			br label %vector.body

	vector.body: ; preds = %vector.body, %vector.ph			vector.body: ; preds = %vector.body, %vector.ph
	%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]			%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
	%0 = getelementptr inbounds float, float* %f, i64 %index			%0 = getelementptr inbounds float, float* %f, i64 %index
	%1 = bitcast float* %0 to <4 x float>*			%1 = bitcast float* %0 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %1, align 4			%wide.load = load <4 x float>, <4 x float>* %1, align 4
	%2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)			%2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
	store <4 x float> %2, <4 x float>* %1, align 4			store <4 x float> %2, <4 x float>* %1, align 4
	%index.next = add i64 %index, 4			%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024			%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body			br i1 %3, label %for.end, label %vector.body

	for.end: ; preds = %vector.body			for.end: ; preds = %vector.body
	ret void			ret void

	; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2':
	; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)

	; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2':
	; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)

	}			}

	declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone			declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone

	define void @test3(float* nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {			define void @test3(float* nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {
				; CORE2-LABEL: 'test3'
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; CORE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; CORE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
				; COREI7-LABEL: 'test3'
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = getelementptr inbounds float, float* %f, i64 %index
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = bitcast float* %0 to <4 x float>*
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %wide.load = load <4 x float>, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %2, <4 x float>* %1, align 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %index.next = add i64 %index, 4
				; COREI7-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %index.next, 1024
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %3, label %for.end, label %vector.body
				; COREI7-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
				;
	vector.ph:			vector.ph:
	br label %vector.body			br label %vector.body

	vector.body: ; preds = %vector.body, %vector.ph			vector.body: ; preds = %vector.body, %vector.ph
	%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]			%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
	%0 = getelementptr inbounds float, float* %f, i64 %index			%0 = getelementptr inbounds float, float* %f, i64 %index
	%1 = bitcast float* %0 to <4 x float>*			%1 = bitcast float* %0 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %1, align 4			%wide.load = load <4 x float>, <4 x float>* %1, align 4
	%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)			%2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
	store <4 x float> %2, <4 x float>* %1, align 4			store <4 x float> %2, <4 x float>* %1, align 4
	%index.next = add i64 %index, 4			%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024			%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body			br i1 %3, label %for.end, label %vector.body

	for.end: ; preds = %vector.body			for.end: ; preds = %vector.body
	ret void			ret void

	; CORE2: Printing analysis 'Cost Model Analysis' for function 'test3':
	; CORE2: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)

	; COREI7: Printing analysis 'Cost Model Analysis' for function 'test3':
	; COREI7: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)

	}			}

	declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone			declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone

llvm/test/Analysis/CostModel/X86/load_store.ll

				; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
	; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx \| FileCheck %s			; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
	target triple = "x86_64-apple-macosx10.8.0"			target triple = "x86_64-apple-macosx10.8.0"

	define i32 @stores(i32 %arg) {			define i32 @stores(i32 %arg) {
				; CHECK-LABEL: 'stores'
	;CHECK: cost of 1 {{.*}} store			; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				;
	store i8 undef, i8* undef, align 4			store i8 undef, i8* undef, align 4
	;CHECK: cost of 1 {{.*}} store
	store i16 undef, i16* undef, align 4			store i16 undef, i16* undef, align 4
	;CHECK: cost of 1 {{.*}} store
	store i32 undef, i32* undef, align 4			store i32 undef, i32* undef, align 4
	;CHECK: cost of 1 {{.*}} store
	store i64 undef, i64* undef, align 4			store i64 undef, i64* undef, align 4
	;CHECK: cost of 2 {{.*}} store
	store i128 undef, i128* undef, align 4			store i128 undef, i128* undef, align 4

	;CHECK: cost of 1 {{.*}} store
	store <4 x i16> undef, <4 x i16>* undef, align 4			store <4 x i16> undef, <4 x i16>* undef, align 4
	;CHECK: cost of 1 {{.*}} store
	store <4 x i32> undef, <4 x i32>* undef, align 4			store <4 x i32> undef, <4 x i32>* undef, align 4
	;CHECK: cost of 2 {{.*}} store
	store <4 x i64> undef, <4 x i64>* undef, align 4			store <4 x i64> undef, <4 x i64>* undef, align 4

	;CHECK: cost of 1 {{.*}} store
	store <8 x i16> undef, <8 x i16>* undef, align 4			store <8 x i16> undef, <8 x i16>* undef, align 4
	;CHECK: cost of 2 {{.*}} store
	store <8 x i32> undef, <8 x i32>* undef, align 4			store <8 x i32> undef, <8 x i32>* undef, align 4
	;CHECK: cost of 4 {{.*}} store
	store <8 x i64> undef, <8 x i64>* undef, align 4			store <8 x i64> undef, <8 x i64>* undef, align 4

	ret i32 undef			ret i32 undef
	}			}
	define i32 @loads(i32 %arg) {			define i32 @loads(i32 %arg) {
	;CHECK: cost of 1 {{.*}} load			; CHECK-LABEL: 'loads'
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <2 x i32>, <2 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <4 x i32>, <4 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <8 x i32>, <8 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i64>, <2 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <4 x i64>, <4 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <8 x i64>, <8 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %12 = load <3 x float>, <3 x float>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x double>, <3 x double>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %14 = load <3 x i32>, <3 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <3 x i64>, <3 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %16 = load <5 x i32>, <5 x i32>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %17 = load <5 x i64>, <5 x i64>* undef, align 4
				; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
				;
	load i8, i8* undef, align 4			load i8, i8* undef, align 4
	;CHECK: cost of 1 {{.*}} load
	load i16, i16* undef, align 4			load i16, i16* undef, align 4
	;CHECK: cost of 1 {{.*}} load
	load i32, i32* undef, align 4			load i32, i32* undef, align 4
	;CHECK: cost of 1 {{.*}} load
	load i64, i64* undef, align 4			load i64, i64* undef, align 4
	;CHECK: cost of 2 {{.*}} load
	load i128, i128* undef, align 4			load i128, i128* undef, align 4

	;CHECK: cost of 1 {{.*}} load
	load <2 x i32>, <2 x i32>* undef, align 4			load <2 x i32>, <2 x i32>* undef, align 4
	;CHECK: cost of 1 {{.*}} load
	load <4 x i32>, <4 x i32>* undef, align 4			load <4 x i32>, <4 x i32>* undef, align 4
	;CHECK: cost of 2 {{.*}} load
	load <8 x i32>, <8 x i32>* undef, align 4			load <8 x i32>, <8 x i32>* undef, align 4


	;CHECK: cost of 1 {{.*}} load
	load <2 x i64>, <2 x i64>* undef, align 4			load <2 x i64>, <2 x i64>* undef, align 4
	;CHECK: cost of 2 {{.*}} load
	load <4 x i64>, <4 x i64>* undef, align 4			load <4 x i64>, <4 x i64>* undef, align 4
	;CHECK: cost of 4 {{.*}} load
	load <8 x i64>, <8 x i64>* undef, align 4			load <8 x i64>, <8 x i64>* undef, align 4


	;CHECK: cost of 3 {{.*}} load
	load <3 x float>, <3 x float>* undef, align 4			load <3 x float>, <3 x float>* undef, align 4

	;CHECK: cost of 3 {{.*}} load
	load <3 x double>, <3 x double>* undef, align 4			load <3 x double>, <3 x double>* undef, align 4

	;CHECK: cost of 3 {{.*}} load
	load <3 x i32>, <3 x i32>* undef, align 4			load <3 x i32>, <3 x i32>* undef, align 4

	;CHECK: cost of 3 {{.*}} load
	load <3 x i64>, <3 x i64>* undef, align 4			load <3 x i64>, <3 x i64>* undef, align 4

	;CHECK: cost of 10 {{.*}} load
	load <5 x i32>, <5 x i32>* undef, align 4			load <5 x i32>, <5 x i32>* undef, align 4

	;CHECK: cost of 10 {{.*}} load
	load <5 x i64>, <5 x i64>* undef, align 4			load <5 x i64>, <5 x i64>* undef, align 4

	ret i32 undef			ret i32 undef
	}			}

llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,SSE,SSE42
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,AVX2
;		;
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,SKL		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX,SKL
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX512,KNL		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX512,KNL
; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX512,SKX		; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze \| FileCheck %s --check-prefixes=CHECK,AVX512,SKX

define i32 @masked_load() {		define i32 @masked_load() {
; SSE-LABEL: 'masked_load'		; SSE2-LABEL: 'masked_load'
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1636 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 818 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 409 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 193 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_load'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX-LABEL: 'masked_load'		; AVX-LABEL: 'masked_load'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; KNL-LABEL: 'masked_load'		; KNL-LABEL: 'masked_load'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; SKX-LABEL: 'masked_load'		; SKX-LABEL: 'masked_load'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	;
%V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)		%V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
%V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)		%V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
%V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)		%V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)

ret i32 0		ret i32 0
}		}

define i32 @masked_store() {		define i32 @masked_store() {
; SSE-LABEL: 'masked_store'		; SSE2-LABEL: 'masked_store'
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 226 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 428 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1456 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 728 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_store'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX-LABEL: 'masked_store'		; AVX-LABEL: 'masked_store'
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; KNL-LABEL: 'masked_store'		; KNL-LABEL: 'masked_store'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; SKX-LABEL: 'masked_store'		; SKX-LABEL: 'masked_store'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	;
call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)		call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)		call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)		call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)

ret i32 0		ret i32 0
}		}

define i32 @masked_gather() {		define i32 @masked_gather() {
; SSE-LABEL: 'masked_gather'		; SSE2-LABEL: 'masked_gather'
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 908 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 454 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 227 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_gather'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX1-LABEL: 'masked_gather'		; AVX1-LABEL: 'masked_gather'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX2-LABEL: 'masked_gather'		; AVX2-LABEL: 'masked_gather'
; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; SKL-LABEL: 'masked_gather'		; SKL-LABEL: 'masked_gather'
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; KNL-LABEL: 'masked_gather'		; KNL-LABEL: 'masked_gather'
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; SKX-LABEL: 'masked_gather'		; SKX-LABEL: 'masked_gather'
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
%V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)		%V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
%V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)		%V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
%V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)		%V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
%V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)		%V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
Show All 22 Lines	;
%V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)		%V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
%V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)		%V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
%V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)		%V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)

ret i32 0		ret i32 0
}		}

define i32 @masked_scatter() {		define i32 @masked_scatter() {
; SSE-LABEL: 'masked_scatter'		; SSE2-LABEL: 'masked_scatter'
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 728 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 182 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_scatter'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX-LABEL: 'masked_scatter'		; AVX-LABEL: 'masked_scatter'
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; KNL-LABEL: 'masked_scatter'		; KNL-LABEL: 'masked_scatter'
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; SKX-LABEL: 'masked_scatter'		; SKX-LABEL: 'masked_scatter'
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)		call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)		call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)		call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)		call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
Show All 22 Lines	;
call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)		call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)		call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)		call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)

ret i32 0		ret i32 0
}		}

define i32 @masked_expandload() {		define i32 @masked_expandload() {
; SSE-LABEL: 'masked_expandload'		; SSE2-LABEL: 'masked_expandload'
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 908 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 454 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 227 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_expandload'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX-LABEL: 'masked_expandload'		; AVX-LABEL: 'masked_expandload'
; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX512-LABEL: 'masked_expandload'		; KNL-LABEL: 'masked_expandload'
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SKX-LABEL: 'masked_expandload'
		; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
%V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)		%V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
%V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)		%V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
%V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)		%V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
%V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)		%V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)

%V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)		%V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
%V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)		%V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
Show All 19 Lines	;
%V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)		%V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
%V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)		%V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
%V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)		%V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)

ret i32 0		ret i32 0
}		}

define i32 @masked_compressstore() {		define i32 @masked_compressstore() {
; SSE-LABEL: 'masked_compressstore'		; SSE2-LABEL: 'masked_compressstore'
; SSE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 193 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 209 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 395 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 1391 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 695 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SSE42-LABEL: 'masked_compressstore'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 191 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX-LABEL: 'masked_compressstore'		; AVX-LABEL: 'masked_compressstore'
; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 127 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 191 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 255 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 127 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)		; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
; AVX512-LABEL: 'masked_compressstore'		; KNL-LABEL: 'masked_compressstore'
; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 191 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 223 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)		; KNL-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
		;
		; SKX-LABEL: 'masked_compressstore'
		; SKX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 239 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;		;
call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)		call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)		call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)		call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)		call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)

call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)		call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)		call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
Show All 21 Lines	;
call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)		call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)

ret i32 0		ret i32 0
}		}

define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {		define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
; SSE2-LABEL: 'test1'		; SSE2-LABEL: 'test1'
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; SSE42-LABEL: 'test1'		; SSE42-LABEL: 'test1'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; AVX-LABEL: 'test1'		; AVX-LABEL: 'test1'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; AVX512-LABEL: 'test1'		; AVX512-LABEL: 'test1'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
%mask = icmp eq <2 x i64> %trigger, zeroinitializer		%mask = icmp eq <2 x i64> %trigger, zeroinitializer
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)		%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
ret <2 x double> %res		ret <2 x double> %res
}		}

define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {		define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
; SSE-LABEL: 'test2'		; SSE2-LABEL: 'test2'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)		; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
		;
		; SSE42-LABEL: 'test2'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX-LABEL: 'test2'		; AVX-LABEL: 'test2'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX512-LABEL: 'test2'		; AVX512-LABEL: 'test2'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer		%mask = icmp eq <4 x i32> %trigger, zeroinitializer
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)		%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {		define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
; SSE-LABEL: 'test3'		; SSE2-LABEL: 'test3'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test3'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX-LABEL: 'test3'		; AVX-LABEL: 'test3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)		; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX512-LABEL: 'test3'		; AVX512-LABEL: 'test3'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer		%mask = icmp eq <4 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)		call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
ret void		ret void
}		}

define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {		define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
; SSE-LABEL: 'test4'		; SSE2-LABEL: 'test4'
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)		; SSE2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
		;
		; SSE42-LABEL: 'test4'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;		;
; AVX1-LABEL: 'test4'		; AVX1-LABEL: 'test4'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)		; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;		;
; AVX2-LABEL: 'test4'		; AVX2-LABEL: 'test4'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
Show All 11 Lines
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;		;
%mask = icmp eq <8 x i32> %trigger, zeroinitializer		%mask = icmp eq <8 x i32> %trigger, zeroinitializer
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)		%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
ret <8 x float> %res		ret <8 x float> %res
}		}

define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {		define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; SSE-LABEL: 'test5'		; SSE2-LABEL: 'test5'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test5'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX-LABEL: 'test5'		; AVX-LABEL: 'test5'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX512-LABEL: 'test5'		; AVX512-LABEL: 'test5'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)		; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer		%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)		call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
ret void		ret void
}		}

define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {		define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
; SSE-LABEL: 'test6'		; SSE2-LABEL: 'test6'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test6'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX-LABEL: 'test6'		; AVX-LABEL: 'test6'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)		; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX512-LABEL: 'test6'		; AVX512-LABEL: 'test6'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)		; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer		%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)		call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
ret void		ret void
}		}

define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {		define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
; SSE-LABEL: 'test7'		; SSE2-LABEL: 'test7'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)		; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
		;
		; SSE42-LABEL: 'test7'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;		;
; AVX-LABEL: 'test7'		; AVX-LABEL: 'test7'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;		;
; AVX512-LABEL: 'test7'		; AVX512-LABEL: 'test7'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)		; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;		;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer		%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)		%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
ret <2 x float> %res		ret <2 x float> %res
}		}

define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {		define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
; SSE-LABEL: 'test8'		; SSE2-LABEL: 'test8'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)		; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
		;
		; SSE42-LABEL: 'test8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;		;
; AVX-LABEL: 'test8'		; AVX-LABEL: 'test8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)		; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;		;
; AVX512-LABEL: 'test8'		; AVX512-LABEL: 'test8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)		; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;		;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer		%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)		%res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
ret <2 x i32> %res		ret <2 x i32> %res
}		}

define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {		define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
; SSE-LABEL: 'test_gather_2f64'		; SSE2-LABEL: 'test_gather_2f64'
; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
		;
		; SSE42-LABEL: 'test_gather_2f64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; AVX1-LABEL: 'test_gather_2f64'		; AVX1-LABEL: 'test_gather_2f64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; AVX2-LABEL: 'test_gather_2f64'		; AVX2-LABEL: 'test_gather_2f64'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; SKL-LABEL: 'test_gather_2f64'		; SKL-LABEL: 'test_gather_2f64'
; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
; AVX512-LABEL: 'test_gather_2f64'		; AVX512-LABEL: 'test_gather_2f64'
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;		;
%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)		%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
ret <2 x double> %res		ret <2 x double> %res
}		}

define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {		define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
; SSE-LABEL: 'test_gather_4i32'		; SSE2-LABEL: 'test_gather_4i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)		; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
		;
		; SSE42-LABEL: 'test_gather_4i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX1-LABEL: 'test_gather_4i32'		; AVX1-LABEL: 'test_gather_4i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)		; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX2-LABEL: 'test_gather_4i32'		; AVX2-LABEL: 'test_gather_4i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)		; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
Show All 10 Lines
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)		%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {		define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {
; SSE-LABEL: 'test_gather_4i32_const_mask'		; SSE2-LABEL: 'test_gather_4i32_const_mask'
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
		;
		; SSE42-LABEL: 'test_gather_4i32_const_mask'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX1-LABEL: 'test_gather_4i32_const_mask'		; AVX1-LABEL: 'test_gather_4i32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)		; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;		;
; AVX2-LABEL: 'test_gather_4i32_const_mask'		; AVX2-LABEL: 'test_gather_4i32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)		; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
Show All 13 Lines	;
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)		%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {		define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask'		; SSE2-LABEL: 'test_gather_16f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SSE42-LABEL: 'test_gather_16f32_const_mask'		; SSE42-LABEL: 'test_gather_16f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX1-LABEL: 'test_gather_16f32_const_mask'		; AVX1-LABEL: 'test_gather_16f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX2-LABEL: 'test_gather_16f32_const_mask'		; AVX2-LABEL: 'test_gather_16f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SKL-LABEL: 'test_gather_16f32_const_mask'		; SKL-LABEL: 'test_gather_16f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
Show All 9 Lines	;
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res		ret <16 x float>%res
}		}

define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {		define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_var_mask'		; SSE2-LABEL: 'test_gather_16f32_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SSE42-LABEL: 'test_gather_16f32_var_mask'		; SSE42-LABEL: 'test_gather_16f32_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX1-LABEL: 'test_gather_16f32_var_mask'		; AVX1-LABEL: 'test_gather_16f32_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX2-LABEL: 'test_gather_16f32_var_mask'		; AVX2-LABEL: 'test_gather_16f32_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SKL-LABEL: 'test_gather_16f32_var_mask'		; SKL-LABEL: 'test_gather_16f32_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
Show All 9 Lines	;
%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res		ret <16 x float>%res
}		}

define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {		define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'		; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'		; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'		; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'		; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SKL-LABEL: 'test_gather_16f32_ra_var_mask'		; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX512-LABEL: 'test_gather_16f32_ra_var_mask'		; AVX512-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
%sext_ind = sext <16 x i32> %ind to <16 x i64>		%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind		%gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind

%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)		%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res		ret <16 x float>%res
}		}

define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {		define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask2'		; SSE2-LABEL: 'test_gather_16f32_const_mask2'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SSE42-LABEL: 'test_gather_16f32_const_mask2'		; SSE42-LABEL: 'test_gather_16f32_const_mask2'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'		; AVX1-LABEL: 'test_gather_16f32_const_mask2'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'		; AVX2-LABEL: 'test_gather_16f32_const_mask2'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;		;
; SKL-LABEL: 'test_gather_16f32_const_mask2'		; SKL-LABEL: 'test_gather_16f32_const_mask2'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float> undef, float %base, i32 0
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float> %broadcast.splatinsert, <16 x float> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
Show All 13 Lines	;
%sext_ind = sext <16 x i32> %ind to <16 x i64>		%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind		%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind

%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)		%res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res		ret <16 x float>%res
}		}

define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {		define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
; SSE-LABEL: 'test_scatter_16i32'		; SSE2-LABEL: 'test_scatter_16i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 226 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test_scatter_16i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
		; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX1-LABEL: 'test_scatter_16i32'		; AVX1-LABEL: 'test_scatter_16i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>		; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX2-LABEL: 'test_scatter_16i32'		; AVX2-LABEL: 'test_scatter_16i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; SKL-LABEL: 'test_scatter_16i32'		; SKL-LABEL: 'test_scatter_16i32'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
; SKL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		; SKL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX512-LABEL: 'test_scatter_16i32'		; AVX512-LABEL: 'test_scatter_16i32'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
%broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0		%broadcast.splatinsert = insertelement <16 x i32> undef, i32 %base, i32 0
%broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer		%broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer

%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind		%gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
%imask = bitcast i16 %mask to <16 x i1>		%imask = bitcast i16 %mask to <16 x i1>
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)		call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
ret void		ret void
}		}

define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {		define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
; SSE-LABEL: 'test_scatter_8i32'		; SSE2-LABEL: 'test_scatter_8i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test_scatter_8i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX-LABEL: 'test_scatter_8i32'		; AVX-LABEL: 'test_scatter_8i32'
; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)		; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX512-LABEL: 'test_scatter_8i32'		; AVX512-LABEL: 'test_scatter_8i32'
; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)		; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)		call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
ret void		ret void
}		}

define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {		define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
; SSE-LABEL: 'test_scatter_4i32'		; SSE2-LABEL: 'test_scatter_4i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)		; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
		;
		; SSE42-LABEL: 'test_scatter_4i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; AVX-LABEL: 'test_scatter_4i32'		; AVX-LABEL: 'test_scatter_4i32'
; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)		; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; KNL-LABEL: 'test_scatter_4i32'		; KNL-LABEL: 'test_scatter_4i32'
; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)		; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
; SKX-LABEL: 'test_scatter_4i32'		; SKX-LABEL: 'test_scatter_4i32'
; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)		; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void		; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;		;
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)		call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
ret void		ret void
}		}

define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {		define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
; SSE2-LABEL: 'test_gather_4f32'		; SSE2-LABEL: 'test_gather_4f32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; SSE42-LABEL: 'test_gather_4f32'		; SSE42-LABEL: 'test_gather_4f32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; AVX1-LABEL: 'test_gather_4f32'		; AVX1-LABEL: 'test_gather_4f32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; AVX2-LABEL: 'test_gather_4f32'		; AVX2-LABEL: 'test_gather_4f32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; SKL-LABEL: 'test_gather_4f32'		; SKL-LABEL: 'test_gather_4f32'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
Show All 15 Lines	;
%res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)		%res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
ret <4 x float>%res		ret <4 x float>%res
}		}

define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {		define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
; SSE2-LABEL: 'test_gather_4f32_const_mask'		; SSE2-LABEL: 'test_gather_4f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)		; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; SSE42-LABEL: 'test_gather_4f32_const_mask'		; SSE42-LABEL: 'test_gather_4f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)		; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; AVX1-LABEL: 'test_gather_4f32_const_mask'		; AVX1-LABEL: 'test_gather_4f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)		; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; AVX2-LABEL: 'test_gather_4f32_const_mask'		; AVX2-LABEL: 'test_gather_4f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)		; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
; SKL-LABEL: 'test_gather_4f32_const_mask'		; SKL-LABEL: 'test_gather_4f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>		; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)		; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res		; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;		;
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show All 32 Lines
	; AVX-LABEL: 'test_vXf64'			; AVX-LABEL: 'test_vXf64'
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>			; AVX-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; AVX512-LABEL: 'test_vXf64'			; AVX512-LABEL: 'test_vXf64'
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; BTVER2-LABEL: 'test_vXf64'			; BTVER2-LABEL: 'test_vXf64'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> <i32 5, i32 6, i32 7, i32 undef>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	%V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>			%V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 0, i32 1>
	%V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>			%V256_23 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> <i32 2, i32 3>
	%V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>			%V512_01 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 0, i32 1>
	%V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>			%V512_23 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> <i32 2, i32 3>
	Show All 22 Lines
	; AVX-LABEL: 'test_vXi64'			; AVX-LABEL: 'test_vXi64'
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; AVX512-LABEL: 'test_vXi64'			; AVX512-LABEL: 'test_vXi64'
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; BTVER2-LABEL: 'test_vXi64'			; BTVER2-LABEL: 'test_vXi64'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	%V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>			%V256_01 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
	%V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>			%V256_23 = shufflevector <4 x i64> %src256, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
	%V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>			%V512_01 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
	%V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>			%V512_23 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
	%V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>			%V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
	▲ Show 20 Lines • Show All 264 Lines • ▼ Show 20 Lines
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	Show All 18 Lines
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	Show All 18 Lines
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	Show All 16 Lines
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 2, i32 3>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 2, i32 3>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 4, i32 5>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 4, i32 5>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 8, i32 9>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 10, i32 11>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 0, i32 1>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 8, i32 9>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 10, i32 11>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 12, i32 13>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 14, i32 15>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 16, i32 17>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 18, i32 19>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 20, i32 21>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 22, i32 23>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 24, i32 25>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 26, i32 27>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>			; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 2, i32 3>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 4, i32 5>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 8, i32 9>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 10, i32 11>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 12, i32 13>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> <i32 14, i32 15>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 0, i32 1>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 2, i32 3>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 4, i32 5>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 8, i32 9>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 10, i32 11>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 12, i32 13>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 14, i32 15>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 16, i32 17>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 457 Lines • ▼ Show 20 Lines
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 18, i32 19>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 20, i32 21>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 22, i32 23>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 24, i32 25>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 26, i32 27>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
	; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>			; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>			; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
	; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	▲ Show 20 Lines • Show All 424 Lines • Show Last 20 Lines

llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll

	Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; AVX512-LABEL: 'test_vXf64'			; AVX512-LABEL: 'test_vXf64'
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; BTVER2-LABEL: 'test_vXf64'			; BTVER2-LABEL: 'test_vXf64'
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; AVX512-LABEL: 'test_vXi64'			; AVX512-LABEL: 'test_vXi64'
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>			; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void			; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
	;			;
	; BTVER2-LABEL: 'test_vXi64'			; BTVER2-LABEL: 'test_vXi64'
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; BTVER2-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
	Show All 23 Lines

llvm/test/Analysis/CostModel/X86/sitofp.ll

	Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines
	; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>			; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
	; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>			; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'sitofp_i64_double'			; AVX512F-LABEL: 'sitofp_i64_double'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512DQ-LABEL: 'sitofp_i64_double'			; AVX512DQ-LABEL: 'sitofp_i64_double'
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines
	; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX-LABEL: 'sitofp_i64_float'			; AVX-LABEL: 'sitofp_i64_float'
	; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float			; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
	; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'sitofp_i64_float'			; AVX512F-LABEL: 'sitofp_i64_float'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512DQ-LABEL: 'sitofp_i64_float'			; AVX512DQ-LABEL: 'sitofp_i64_float'
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'sitofp_i64_float'			; BTVER2-LABEL: 'sitofp_i64_float'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float			; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	%cvt_i64_f32 = sitofp i64 undef to float			%cvt_i64_f32 = sitofp i64 undef to float
	%cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>			%cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
	%cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>			%cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
	%cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>			%cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
	%cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>			%cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
	ret i32 undef			ret i32 undef
	}			}

llvm/test/Analysis/CostModel/X86/uitofp.ll

	Show First 20 Lines • Show All 281 Lines • ▼ Show 20 Lines
	; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			; SSE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX-LABEL: 'uitofp_i64_float'			; AVX-LABEL: 'uitofp_i64_float'
	; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float			; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
	; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			; AVX-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512F-LABEL: 'uitofp_i64_float'			; AVX512F-LABEL: 'uitofp_i64_float'
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float			; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; AVX512DQ-LABEL: 'uitofp_i64_float'			; AVX512DQ-LABEL: 'uitofp_i64_float'
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	; BTVER2-LABEL: 'uitofp_i64_float'			; BTVER2-LABEL: 'uitofp_i64_float'
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float			; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f32 = uitofp i64 undef to float
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			; BTVER2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef			; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
	;			;
	%cvt_i64_f32 = uitofp i64 undef to float			%cvt_i64_f32 = uitofp i64 undef to float
	%cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>			%cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float>
	%cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>			%cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float>
	%cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>			%cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float>
	%cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>			%cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float>
	ret i32 undef			ret i32 undef
	}			}

llvm/test/Analysis/CostModel/X86/vector-extract.ll

Show All 28 Lines
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'extract_double'		; AVX-LABEL: 'extract_double'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
		lebedev.riUnsubmitted Not Done Reply Inline Actions Hm, why is this cost=0, don't we need to extract 3'th sub-register here? lebedev.ri: Hm, why is this cost=0, don't we need to extract 3'th sub-register here?
		RKSimonAuthorUnsubmitted Done Reply Inline Actions AVX1/2 - this will legalize to 2 * <4 x double> to fit into ymm registers, so its already at index0 of the 2nd <4 x double> RKSimon: AVX1/2 - this will legalize to 2 * <4 x double> to fit into ymm registers, so its already at…
		lebedev.riUnsubmitted Done Reply Inline Actions Right. lebedev.ri: Right.
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_double'		; AVX512-LABEL: 'extract_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'extract_double'		; BTVER2-LABEL: 'extract_double'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = extractelement <2 x double> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = extractelement <4 x double> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = extractelement <4 x double> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_3 = extractelement <4 x double> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = extractelement <8 x double> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = extractelement <8 x double> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_3 = extractelement <8 x double> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = extractelement <8 x double> undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_7 = extractelement <8 x double> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2f64_a = extractelement <2 x double> undef, i32 %arg		%v2f64_a = extractelement <2 x double> undef, i32 %arg
%v2f64_0 = extractelement <2 x double> undef, i32 0		%v2f64_0 = extractelement <2 x double> undef, i32 0
%v2f64_1 = extractelement <2 x double> undef, i32 1		%v2f64_1 = extractelement <2 x double> undef, i32 1

%v4f64_a = extractelement <4 x double> undef, i32 %arg		%v4f64_a = extractelement <4 x double> undef, i32 %arg
%v4f64_0 = extractelement <4 x double> undef, i32 0		%v4f64_0 = extractelement <4 x double> undef, i32 0
Show All 34 Lines
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_float'		; AVX512-LABEL: 'extract_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'extract_float'		; BTVER2-LABEL: 'extract_float'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = extractelement <2 x float> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = extractelement <4 x float> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = extractelement <4 x float> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = extractelement <4 x float> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = extractelement <8 x float> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = extractelement <8 x float> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = extractelement <8 x float> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = extractelement <8 x float> undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = extractelement <8 x float> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = extractelement <16 x float> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = extractelement <16 x float> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = extractelement <16 x float> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = extractelement <16 x float> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = extractelement <16 x float> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2f32_a = extractelement <2 x float> undef, i32 %arg		%v2f32_a = extractelement <2 x float> undef, i32 %arg
%v2f32_0 = extractelement <2 x float> undef, i32 0		%v2f32_0 = extractelement <2 x float> undef, i32 0
%v2f32_1 = extractelement <2 x float> undef, i32 1		%v2f32_1 = extractelement <2 x float> undef, i32 1

%v4f32_a = extractelement <4 x float> undef, i32 %arg		%v4f32_a = extractelement <4 x float> undef, i32 %arg
%v4f32_0 = extractelement <4 x float> undef, i32 0		%v4f32_0 = extractelement <4 x float> undef, i32 0
Show All 13 Lines	;

ret i32 undef		ret i32 undef
}		}

define i32 @extract_i64(i32 %arg) {		define i32 @extract_i64(i32 %arg) {
; SSE2-LABEL: 'extract_i64'		; SSE2-LABEL: 'extract_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE3-LABEL: 'extract_i64'		; SSE3-LABEL: 'extract_i64'
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSSE3-LABEL: 'extract_i64'		; SSSE3-LABEL: 'extract_i64'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE41-LABEL: 'extract_i64'		; SSE41-LABEL: 'extract_i64'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'extract_i64'		; AVX-LABEL: 'extract_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_i64'		; AVX512-LABEL: 'extract_i64'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'extract_i64'		; SLM-LABEL: 'extract_i64'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'extract_i64'		; GLM-LABEL: 'extract_i64'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'extract_i64'		; BTVER2-LABEL: 'extract_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2i64_a = extractelement <2 x i64> undef, i32 %arg		%v2i64_a = extractelement <2 x i64> undef, i32 %arg
%v2i64_0 = extractelement <2 x i64> undef, i32 0		%v2i64_0 = extractelement <2 x i64> undef, i32 0
%v2i64_1 = extractelement <2 x i64> undef, i32 1		%v2i64_1 = extractelement <2 x i64> undef, i32 1

%v4i64_a = extractelement <4 x i64> undef, i32 %arg		%v4i64_a = extractelement <4 x i64> undef, i32 %arg
%v4i64_0 = extractelement <4 x i64> undef, i32 0		%v4i64_0 = extractelement <4 x i64> undef, i32 0
%v4i64_3 = extractelement <4 x i64> undef, i32 3		%v4i64_3 = extractelement <4 x i64> undef, i32 3

%v8i64_a = extractelement <8 x i64> undef, i32 %arg		%v8i64_a = extractelement <8 x i64> undef, i32 %arg
%v8i64_0 = extractelement <8 x i64> undef, i32 0		%v8i64_0 = extractelement <8 x i64> undef, i32 0
%v8i64_3 = extractelement <8 x i64> undef, i32 3		%v8i64_3 = extractelement <8 x i64> undef, i32 3
%v8i64_4 = extractelement <8 x i64> undef, i32 4		%v8i64_4 = extractelement <8 x i64> undef, i32 4
%v8i64_7 = extractelement <8 x i64> undef, i32 7		%v8i64_7 = extractelement <8 x i64> undef, i32 7

ret i32 undef		ret i32 undef
}		}

define i32 @extract_i32(i32 %arg) {		define i32 @extract_i32(i32 %arg) {
; SSE2-LABEL: 'extract_i32'		; SSE2-LABEL: 'extract_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE3-LABEL: 'extract_i32'		; SSE3-LABEL: 'extract_i32'
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSSE3-LABEL: 'extract_i32'		; SSSE3-LABEL: 'extract_i32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE41-LABEL: 'extract_i32'		; SSE41-LABEL: 'extract_i32'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
Show All 15 Lines
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_i32'		; AVX512-LABEL: 'extract_i32'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'extract_i32'		; SLM-LABEL: 'extract_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'extract_i32'		; GLM-LABEL: 'extract_i32'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
Show All 15 Lines
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2i32_a = extractelement <2 x i32> undef, i32 %arg		%v2i32_a = extractelement <2 x i32> undef, i32 %arg
%v2i32_0 = extractelement <2 x i32> undef, i32 0		%v2i32_0 = extractelement <2 x i32> undef, i32 0
%v2i32_1 = extractelement <2 x i32> undef, i32 1		%v2i32_1 = extractelement <2 x i32> undef, i32 1

%v4i32_a = extractelement <4 x i32> undef, i32 %arg		%v4i32_a = extractelement <4 x i32> undef, i32 %arg
%v4i32_0 = extractelement <4 x i32> undef, i32 0		%v4i32_0 = extractelement <4 x i32> undef, i32 0
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines
;		;
; AVX-LABEL: 'extract_i16'		; AVX-LABEL: 'extract_i16'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_i16'		; AVX512F-LABEL: 'extract_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512BW-LABEL: 'extract_i16'
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'extract_i16'		; SLM-LABEL: 'extract_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'extract_i16'		; GLM-LABEL: 'extract_i16'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
Show All 12 Lines
;		;
; BTVER2-LABEL: 'extract_i16'		; BTVER2-LABEL: 'extract_i16'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v8i16_a = extractelement <8 x i16> undef, i32 %arg		%v8i16_a = extractelement <8 x i16> undef, i32 %arg
%v8i16_0 = extractelement <8 x i16> undef, i32 0		%v8i16_0 = extractelement <8 x i16> undef, i32 0
%v8i16_7 = extractelement <8 x i16> undef, i32 7		%v8i16_7 = extractelement <8 x i16> undef, i32 7

%v16i16_a = extractelement <16 x i16> undef, i32 %arg		%v16i16_a = extractelement <16 x i16> undef, i32 %arg
%v16i16_0 = extractelement <16 x i16> undef, i32 0		%v16i16_0 = extractelement <16 x i16> undef, i32 0
Show All 12 Lines	;

ret i32 undef		ret i32 undef
}		}

define i32 @extract_i8(i32 %arg) {		define i32 @extract_i8(i32 %arg) {
; SSE2-LABEL: 'extract_i8'		; SSE2-LABEL: 'extract_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE3-LABEL: 'extract_i8'		; SSE3-LABEL: 'extract_i8'
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; SSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSSE3-LABEL: 'extract_i8'		; SSSE3-LABEL: 'extract_i8'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SSE41-LABEL: 'extract_i8'		; SSE41-LABEL: 'extract_i8'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
Show All 20 Lines
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'extract_i8'		; AVX512F-LABEL: 'extract_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512BW-LABEL: 'extract_i8'
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; SLM-LABEL: 'extract_i8'		; SLM-LABEL: 'extract_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; GLM-LABEL: 'extract_i8'		; GLM-LABEL: 'extract_i8'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
Show All 20 Lines
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v16i8_a = extractelement <16 x i8> undef, i32 %arg		%v16i8_a = extractelement <16 x i8> undef, i32 %arg
%v16i8_0 = extractelement <16 x i8> undef, i32 0		%v16i8_0 = extractelement <16 x i8> undef, i32 0
%v16i8_8 = extractelement <16 x i8> undef, i32 8		%v16i8_8 = extractelement <16 x i8> undef, i32 8
%v16i8_15 = extractelement <16 x i8> undef, i32 15		%v16i8_15 = extractelement <16 x i8> undef, i32 15

%v32i8_a = extractelement <32 x i8> undef, i32 %arg		%v32i8_a = extractelement <32 x i8> undef, i32 %arg
Show All 20 Lines

llvm/test/Analysis/CostModel/X86/vector-insert.ll

Show All 28 Lines
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'insert_double'		; AVX-LABEL: 'insert_double'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'insert_double'		; AVX512-LABEL: 'insert_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_double'		; BTVER2-LABEL: 'insert_double'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> undef, double undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> undef, double undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> undef, double undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg		%v2f64_a = insertelement <2 x double> undef, double undef, i32 %arg
%v2f64_0 = insertelement <2 x double> undef, double undef, i32 0		%v2f64_0 = insertelement <2 x double> undef, double undef, i32 0
%v2f64_1 = insertelement <2 x double> undef, double undef, i32 1		%v2f64_1 = insertelement <2 x double> undef, double undef, i32 1

%v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg		%v4f64_a = insertelement <4 x double> undef, double undef, i32 %arg
%v4f64_0 = insertelement <4 x double> undef, double undef, i32 0		%v4f64_0 = insertelement <4 x double> undef, double undef, i32 0
%v4f64_3 = insertelement <4 x double> undef, double undef, i32 3		%v4f64_3 = insertelement <4 x double> undef, double undef, i32 3

%v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg		%v8f64_a = insertelement <8 x double> undef, double undef, i32 %arg
%v8f64_0 = insertelement <8 x double> undef, double undef, i32 0		%v8f64_0 = insertelement <8 x double> undef, double undef, i32 0
%v8f64_3 = insertelement <8 x double> undef, double undef, i32 3		%v8f64_3 = insertelement <8 x double> undef, double undef, i32 3
%v8f64_4 = insertelement <8 x double> undef, double undef, i32 4		%v8f64_4 = insertelement <8 x double> undef, double undef, i32 4
%v8f64_7 = insertelement <8 x double> undef, double undef, i32 7		%v8f64_7 = insertelement <8 x double> undef, double undef, i32 7

ret i32 undef		ret i32 undef
}		}

define i32 @insert_float(i32 %arg) {		define i32 @insert_float(i32 %arg) {
; SSE-LABEL: 'insert_float'		; SSE-LABEL: 'insert_float'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15		; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX-LABEL: 'insert_float'		; AVX-LABEL: 'insert_float'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3		; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15		; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; AVX512-LABEL: 'insert_float'		; AVX512-LABEL: 'insert_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8		; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_float'		; BTVER2-LABEL: 'insert_float'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> undef, float undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> undef, float undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_a = insertelement <8 x float> undef, float undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> undef, float undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> undef, float undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> undef, float undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32_7 = insertelement <8 x float> undef, float undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_a = insertelement <16 x float> undef, float undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> undef, float undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32_15 = insertelement <16 x float> undef, float undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg		%v2f32_a = insertelement <2 x float> undef, float undef, i32 %arg
%v2f32_0 = insertelement <2 x float> undef, float undef, i32 0		%v2f32_0 = insertelement <2 x float> undef, float undef, i32 0
%v2f32_1 = insertelement <2 x float> undef, float undef, i32 1		%v2f32_1 = insertelement <2 x float> undef, float undef, i32 1

%v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg		%v4f32_a = insertelement <4 x float> undef, float undef, i32 %arg
%v4f32_0 = insertelement <4 x float> undef, float undef, i32 0		%v4f32_0 = insertelement <4 x float> undef, float undef, i32 0
Show All 10 Lines	;
%v16f32_3 = insertelement <16 x float> undef, float undef, i32 3		%v16f32_3 = insertelement <16 x float> undef, float undef, i32 3
%v16f32_8 = insertelement <16 x float> undef, float undef, i32 8		%v16f32_8 = insertelement <16 x float> undef, float undef, i32 8
%v16f32_15 = insertelement <16 x float> undef, float undef, i32 15		%v16f32_15 = insertelement <16 x float> undef, float undef, i32 15

ret i32 undef		ret i32 undef
}		}

define i32 @insert_i64(i32 %arg) {		define i32 @insert_i64(i32 %arg) {
; CHECK-LABEL: 'insert_i64'		; SSE2-LABEL: 'insert_i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE3-LABEL: 'insert_i64'
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSSE3-LABEL: 'insert_i64'
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE41-LABEL: 'insert_i64'
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'insert_i64'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX-LABEL: 'insert_i64'
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512-LABEL: 'insert_i64'
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_i64'		; BTVER2-LABEL: 'insert_i64'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg		%v2i64_a = insertelement <2 x i64> undef, i64 undef, i32 %arg
%v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0		%v2i64_0 = insertelement <2 x i64> undef, i64 undef, i32 0
%v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1		%v2i64_1 = insertelement <2 x i64> undef, i64 undef, i32 1

%v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg		%v4i64_a = insertelement <4 x i64> undef, i64 undef, i32 %arg
%v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0		%v4i64_0 = insertelement <4 x i64> undef, i64 undef, i32 0
%v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3		%v4i64_3 = insertelement <4 x i64> undef, i64 undef, i32 3

%v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg		%v8i64_a = insertelement <8 x i64> undef, i64 undef, i32 %arg
%v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0		%v8i64_0 = insertelement <8 x i64> undef, i64 undef, i32 0
%v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3		%v8i64_3 = insertelement <8 x i64> undef, i64 undef, i32 3
%v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4		%v8i64_4 = insertelement <8 x i64> undef, i64 undef, i32 4
%v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7		%v8i64_7 = insertelement <8 x i64> undef, i64 undef, i32 7

ret i32 undef		ret i32 undef
}		}

define i32 @insert_i32(i32 %arg) {		define i32 @insert_i32(i32 %arg) {
; CHECK-LABEL: 'insert_i32'		; SSE2-LABEL: 'insert_i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE3-LABEL: 'insert_i32'
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSSE3-LABEL: 'insert_i32'
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE41-LABEL: 'insert_i32'
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'insert_i32'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX-LABEL: 'insert_i32'
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512-LABEL: 'insert_i32'
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
		; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
		; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
		; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_i32'		; BTVER2-LABEL: 'insert_i32'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = insertelement <4 x i32> undef, i32 undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = insertelement <8 x i32> undef, i32 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = insertelement <8 x i32> undef, i32 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = insertelement <8 x i32> undef, i32 undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_4 = insertelement <8 x i32> undef, i32 undef, i32 4
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i32_7 = insertelement <8 x i32> undef, i32 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = insertelement <16 x i32> undef, i32 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = insertelement <16 x i32> undef, i32 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg		%v2i32_a = insertelement <2 x i32> undef, i32 undef, i32 %arg
%v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0		%v2i32_0 = insertelement <2 x i32> undef, i32 undef, i32 0
%v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1		%v2i32_1 = insertelement <2 x i32> undef, i32 undef, i32 1

%v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg		%v4i32_a = insertelement <4 x i32> undef, i32 undef, i32 %arg
%v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0		%v4i32_0 = insertelement <4 x i32> undef, i32 undef, i32 0
Show All 10 Lines	;
%v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3		%v16i32_3 = insertelement <16 x i32> undef, i32 undef, i32 3
%v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8		%v16i32_8 = insertelement <16 x i32> undef, i32 undef, i32 8
%v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15		%v16i32_15 = insertelement <16 x i32> undef, i32 undef, i32 15

ret i32 undef		ret i32 undef
}		}

define i32 @insert_i16(i32 %arg) {		define i32 @insert_i16(i32 %arg) {
; CHECK-LABEL: 'insert_i16'		; SSE-LABEL: 'insert_i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31		; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX-LABEL: 'insert_i16'
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31
		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512F-LABEL: 'insert_i16'
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512BW-LABEL: 'insert_i16'
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_i16'		; BTVER2-LABEL: 'insert_i16'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> undef, i16 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_8 = insertelement <16 x i16> undef, i16 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i16_15 = insertelement <16 x i16> undef, i16 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> undef, i16 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> undef, i16 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> undef, i16 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_8 = insertelement <32 x i16> undef, i16 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_15 = insertelement <32 x i16> undef, i16 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg		%v8i16_a = insertelement <8 x i16> undef, i16 undef, i32 %arg
%v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0		%v8i16_0 = insertelement <8 x i16> undef, i16 undef, i32 0
%v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7		%v8i16_7 = insertelement <8 x i16> undef, i16 undef, i32 7

%v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg		%v16i16_a = insertelement <16 x i16> undef, i16 undef, i32 %arg
%v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0		%v16i16_0 = insertelement <16 x i16> undef, i16 undef, i32 0
Show All 9 Lines	;
%v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16		%v32i16_16 = insertelement <32 x i16> undef, i16 undef, i32 16
%v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24		%v32i16_24 = insertelement <32 x i16> undef, i16 undef, i32 24
%v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31		%v32i16_31 = insertelement <32 x i16> undef, i16 undef, i32 31

ret i32 undef		ret i32 undef
}		}

define i32 @insert_i8(i32 %arg) {		define i32 @insert_i8(i32 %arg) {
; CHECK-LABEL: 'insert_i8'		; SSE2-LABEL: 'insert_i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48		; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63		; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE3-LABEL: 'insert_i8'
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; SSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSSE3-LABEL: 'insert_i8'
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE41-LABEL: 'insert_i8'
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; SSE42-LABEL: 'insert_i8'
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX-LABEL: 'insert_i8'
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512F-LABEL: 'insert_i8'
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
		;
		; AVX512BW-LABEL: 'insert_i8'
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
		; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
; BTVER2-LABEL: 'insert_i8'		; BTVER2-LABEL: 'insert_i8'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> undef, i8 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> undef, i8 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> undef, i8 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> undef, i8 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_24 = insertelement <32 x i8> undef, i8 undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i8_31 = insertelement <32 x i8> undef, i8 undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> undef, i8 undef, i32 %arg
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> undef, i8 undef, i32 0
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> undef, i8 undef, i32 7
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> undef, i8 undef, i32 8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> undef, i8 undef, i32 15
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_24 = insertelement <64 x i8> undef, i8 undef, i32 24
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_31 = insertelement <64 x i8> undef, i8 undef, i32 31
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32		; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> undef, i8 undef, i32 32
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_48 = insertelement <64 x i8> undef, i8 undef, i32 48
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63		; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i8_63 = insertelement <64 x i8> undef, i8 undef, i32 63
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef		; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;		;
%v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg		%v16i8_a = insertelement <16 x i8> undef, i8 undef, i32 %arg
%v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0		%v16i8_0 = insertelement <16 x i8> undef, i8 undef, i32 0
%v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8		%v16i8_8 = insertelement <16 x i8> undef, i8 undef, i32 8
%v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15		%v16i8_15 = insertelement <16 x i8> undef, i8 undef, i32 15

%v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg		%v32i8_a = insertelement <32 x i8> undef, i8 undef, i32 %arg
Show All 20 Lines

llvm/test/Transforms/LoopVectorize/X86/interleaving.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine < %s \| FileCheck %s --check-prefix=NORMAL			; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine < %s \| FileCheck %s --check-prefix=NORMAL
	; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=slm < %s \| FileCheck %s --check-prefix=SLOW			; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=slm < %s \| FileCheck %s --check-prefix=SLOW
	; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=atom < %s \| FileCheck %s --check-prefix=SLOW			; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=atom < %s \| FileCheck %s --check-prefix=SLOW
				lebedev.riUnsubmitted Done Reply Inline Actions This test no longer does what it was doing, i think you want to add some faster cpu runline lebedev.ri: This test no longer does what it was doing, i think you want to add some faster cpu runline

	define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) {			define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) {
	; NORMAL-LABEL: @foo(			; NORMAL-LABEL: @foo(
	; NORMAL-NEXT: entry:			; NORMAL-NEXT: entry:
	; NORMAL-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; NORMAL: vector.ph:
	; NORMAL-NEXT: br label [[VECTOR_BODY:%.*]]
	; NORMAL: vector.body:
	; NORMAL-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; NORMAL-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
	; NORMAL-NEXT: [[TMP1:%.]] = getelementptr inbounds i32, i32 [[B:%.*]], i64 [[TMP0]]
	; NORMAL-NEXT: [[TMP2:%.]] = bitcast i32 [[TMP1]] to <8 x i32>*
	; NORMAL-NEXT: [[WIDE_VEC:%.]] = load <8 x i32>, <8 x i32> [[TMP2]], align 4
	; NORMAL-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
	; NORMAL-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
	; NORMAL-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
	; NORMAL-NEXT: [[TMP4:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[INDEX]]
	; NORMAL-NEXT: [[TMP5:%.]] = bitcast i32 [[TMP4]] to <4 x i32>*
	; NORMAL-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP5]], align 4
	; NORMAL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
	; NORMAL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
	; NORMAL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
	; NORMAL: middle.block:
	; NORMAL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
	; NORMAL: scalar.ph:
	; NORMAL-NEXT: br label [[FOR_BODY:%.*]]			; NORMAL-NEXT: br label [[FOR_BODY:%.*]]
	; NORMAL: for.cond.cleanup:			; NORMAL: for.cond.cleanup:
	; NORMAL-NEXT: ret void			; NORMAL-NEXT: ret void
	; NORMAL: for.body:			; NORMAL: for.body:
	; NORMAL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !2			; NORMAL-NEXT: [[INDVARS_IV:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
				; NORMAL-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
				; NORMAL-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i32, i32 [[B:%.*]], i64 [[TMP0]]
				; NORMAL-NEXT: [[TMP1:%.]] = load i32, i32 [[ARRAYIDX]], align 4
				; NORMAL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
				; NORMAL-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[B]], i64 [[TMP2]]
				; NORMAL-NEXT: [[TMP3:%.]] = load i32, i32 [[ARRAYIDX3]], align 4
				; NORMAL-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
				; NORMAL-NEXT: [[ARRAYIDX6:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[INDVARS_IV]]
				; NORMAL-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX6]], align 4
				; NORMAL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
				; NORMAL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
				; NORMAL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
	;			;
	; SLOW-LABEL: @foo(			; SLOW-LABEL: @foo(
	; SLOW-NEXT: entry:			; SLOW-NEXT: entry:
	; SLOW-NEXT: br label [[FOR_BODY:%.*]]			; SLOW-NEXT: br label [[FOR_BODY:%.*]]
	; SLOW: for.cond.cleanup:			; SLOW: for.cond.cleanup:
	; SLOW-NEXT: ret void			; SLOW-NEXT: ret void
	; SLOW: for.body:			; SLOW: for.body:
	; SLOW-NEXT: [[INDVARS_IV:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]			; SLOW-NEXT: [[INDVARS_IV:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
	Show All 34 Lines

llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll

	Show All 12 Lines
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[IDXPROM:%.]] = sext i32 [[I:%.]] to i64			; CHECK-NEXT: [[IDXPROM:%.]] = sext i32 [[I:%.]] to i64
	; CHECK-NEXT: [[IDXPROM5:%.]] = sext i32 [[J:%.]] to i64			; CHECK-NEXT: [[IDXPROM5:%.]] = sext i32 [[J:%.]] to i64
	; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_PHI:%.]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[VEC_PHI:%.]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0			; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
	; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer			; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
	; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>			; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
	; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0			; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
	; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1			; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
	; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2			; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
	; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3			; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
	; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4			; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
	; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5			; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i32, i32 [[TMP4]], i32 0
	; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6			; CHECK-NEXT: [[TMP6:%.]] = bitcast i32 [[TMP5]] to <4 x i32>*
	; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7			; CHECK-NEXT: [[WIDE_LOAD:%.]] = load <4 x i32>, <4 x i32> [[TMP6]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP8:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]			; CHECK-NEXT: [[TMP7:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
	; CHECK-NEXT: [[TMP9:%.]] = getelementptr inbounds i32, i32 [[TMP8]], i32 0			; CHECK-NEXT: [[TMP8:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
	; CHECK-NEXT: [[TMP10:%.]] = bitcast i32 [[TMP9]] to <8 x i32>*			; CHECK-NEXT: [[TMP9:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
	; CHECK-NEXT: [[WIDE_LOAD:%.]] = load <8 x i32>, <8 x i32> [[TMP10]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP10:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
	; CHECK-NEXT: [[TMP11:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP7]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP12:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP12:%.]] = load i32, i32 [[TMP8]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP13:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP13:%.]] = load i32, i32 [[TMP9]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP14:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP10]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP15:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP4]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0
	; CHECK-NEXT: [[TMP16:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 1
	; CHECK-NEXT: [[TMP17:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP13]], i32 2
	; CHECK-NEXT: [[TMP18:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i32 3
	; CHECK-NEXT: [[TMP19:%.]] = load i32, i32 [[TMP11]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP18]], [[WIDE_LOAD]]
	; CHECK-NEXT: [[TMP20:%.]] = load i32, i32 [[TMP12]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4>
	; CHECK-NEXT: [[TMP21:%.]] = load i32, i32 [[TMP13]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP21]] = add <4 x i32> [[TMP20]], [[TMP19]]
	; CHECK-NEXT: [[TMP22:%.]] = load i32, i32 [[TMP14]], align 4, !tbaa !1			; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
	; CHECK-NEXT: [[TMP23:%.]] = load i32, i32 [[TMP15]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
	; CHECK-NEXT: [[TMP24:%.]] = load i32, i32 [[TMP16]], align 4, !tbaa !1			; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
	; CHECK-NEXT: [[TMP25:%.]] = load i32, i32 [[TMP17]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP26:%.]] = load i32, i32 [[TMP18]], align 4, !tbaa !1
	; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
	; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP20]], i32 1
	; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP21]], i32 2
	; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP22]], i32 3
	; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP23]], i32 4
	; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP24]], i32 5
	; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP25]], i32 6
	; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP33]], i32 [[TMP26]], i32 7
	; CHECK-NEXT: [[TMP35:%.*]] = mul nsw <8 x i32> [[TMP34]], [[WIDE_LOAD]]
	; CHECK-NEXT: [[TMP36:%.*]] = add <8 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
	; CHECK-NEXT: [[TMP37]] = add <8 x i32> [[TMP36]], [[TMP35]]
	; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
	; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
	; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP37]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>			; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
	; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP37]], [[RDX_SHUF]]			; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP21]], [[RDX_SHUF]]
	; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
	; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]			; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
	; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>			; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
	; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100
	; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 96
	; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]			; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.cond.cleanup:			; CHECK: for.cond.cleanup:
	; CHECK-NEXT: [[ADD7_LCSSA:%.]] = phi i32 [ [[ADD7:%.]], [[FOR_BODY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]			; CHECK-NEXT: [[ADD7_LCSSA:%.]] = phi i32 [ [[ADD7:%.]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
	; CHECK-NEXT: ret i32 [[ADD7_LCSSA]]			; CHECK-NEXT: ret i32 [[ADD7_LCSSA]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]			; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ]			; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: [[TMP40:%.]] = load i32, i32 [[ARRAYIDX2]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP24:%.]] = load i32, i32 [[ARRAYIDX2]], align 4, !tbaa !1
	; CHECK-NEXT: [[ARRAYIDX6:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]			; CHECK-NEXT: [[ARRAYIDX6:%.]] = getelementptr inbounds [100 x i32], [100 x i32] [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
	; CHECK-NEXT: [[TMP41:%.]] = load i32, i32 [[ARRAYIDX6]], align 4, !tbaa !1			; CHECK-NEXT: [[TMP25:%.]] = load i32, i32 [[ARRAYIDX6]], align 4, !tbaa !1
	; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP41]], [[TMP40]]			; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], [[TMP24]]
	; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4			; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4
	; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]]			; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]]
	; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1			; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !7			; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !7
	;			;
	entry:			entry:
	%idxprom = sext i32 %i to i64			%idxprom = sext i32 %i to i64
	Show All 32 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll

Show First 20 Lines • Show All 241 Lines • ▼ Show 20 Lines	;
%r4 = insertelement <8 x float> %r3, float %ac4, i32 4		%r4 = insertelement <8 x float> %r3, float %ac4, i32 4
%r5 = insertelement <8 x float> %r4, float %ac5, i32 5		%r5 = insertelement <8 x float> %r4, float %ac5, i32 5
%r6 = insertelement <8 x float> %r5, float %ac6, i32 6		%r6 = insertelement <8 x float> %r5, float %ac6, i32 6
%r7 = insertelement <8 x float> %r6, float %ac7, i32 7		%r7 = insertelement <8 x float> %r6, float %ac7, i32 7
ret <8 x float> %r7		ret <8 x float> %r7
}		}

define <8 x i32> @sext_zext(<8 x i16> %a) {		define <8 x i32> @sext_zext(<8 x i16> %a) {
; SSE-LABEL: @sext_zext(		; CHECK-LABEL: @sext_zext(
; SSE-NEXT: [[TMP1:%.]] = sext <8 x i16> [[A:%.]] to <8 x i32>		; CHECK-NEXT: [[TMP1:%.]] = sext <8 x i16> [[A:%.]] to <8 x i32>
; SSE-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>		; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>		; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <8 x i32> [[R7]]		; CHECK-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @sext_zext(
; SLM-NEXT: [[A0:%.]] = extractelement <8 x i16> [[A:%.]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7
; SLM-NEXT: [[AB0:%.*]] = sext i16 [[A0]] to i32
; SLM-NEXT: [[AB1:%.*]] = sext i16 [[A1]] to i32
; SLM-NEXT: [[AB2:%.*]] = sext i16 [[A2]] to i32
; SLM-NEXT: [[AB3:%.*]] = sext i16 [[A3]] to i32
; SLM-NEXT: [[AB4:%.*]] = zext i16 [[A4]] to i32
; SLM-NEXT: [[AB5:%.*]] = zext i16 [[A5]] to i32
; SLM-NEXT: [[AB6:%.*]] = zext i16 [[A6]] to i32
; SLM-NEXT: [[AB7:%.*]] = zext i16 [[A7]] to i32
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX-LABEL: @sext_zext(
; AVX-NEXT: [[TMP1:%.]] = sext <8 x i16> [[A:%.]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @sext_zext(
; AVX512-NEXT: [[TMP1:%.]] = sext <8 x i16> [[A:%.]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
;		;
%a0 = extractelement <8 x i16> %a, i32 0		%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1		%a1 = extractelement <8 x i16> %a, i32 1
%a2 = extractelement <8 x i16> %a, i32 2		%a2 = extractelement <8 x i16> %a, i32 2
%a3 = extractelement <8 x i16> %a, i32 3		%a3 = extractelement <8 x i16> %a, i32 3
%a4 = extractelement <8 x i16> %a, i32 4		%a4 = extractelement <8 x i16> %a, i32 4
%a5 = extractelement <8 x i16> %a, i32 5		%a5 = extractelement <8 x i16> %a, i32 5
%a6 = extractelement <8 x i16> %a, i32 6		%a6 = extractelement <8 x i16> %a, i32 6
▲ Show 20 Lines • Show All 119 Lines • ▼ Show 20 Lines
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3		; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4		; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5		; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6		; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7		; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; SLM-NEXT: ret <8 x float> [[R7]]		; SLM-NEXT: ret <8 x float> [[R7]]
;		;
; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8(		; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; AVX-NEXT: [[A0:%.]] = extractelement <4 x i32> [[A:%.]], i32 0
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
; AVX-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
; AVX-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
; AVX-NEXT: [[B0:%.]] = extractelement <8 x i16> [[B:%.]], i32 0		; AVX-NEXT: [[B0:%.]] = extractelement <8 x i16> [[B:%.]], i32 0
; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1		; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
; AVX-NEXT: [[C0:%.]] = extractelement <16 x i8> [[C:%.]], i32 0		; AVX-NEXT: [[C0:%.]] = extractelement <16 x i8> [[C:%.]], i32 0
; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1		; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1
; AVX-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float		; AVX-NEXT: [[TMP1:%.]] = sitofp <4 x i32> [[A:%.]] to <4 x float>
; AVX-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float		; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float>
; AVX-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float
; AVX-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float
; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float		; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float
; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float		; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float
; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float		; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float
; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float		; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0		; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1		; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP3]], i32 0
; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2		; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3		; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1
		; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
		; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2
		; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
		; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3
; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4		; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4
; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5		; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5
; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6		; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6
; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7		; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7
; AVX-NEXT: ret <8 x float> [[R7]]		; AVX-NEXT: ret <8 x float> [[R7]]
;		;
; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8(		; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8(
; AVX512-NEXT: [[B0:%.]] = extractelement <8 x i16> [[B:%.]], i32 0		; AVX512-NEXT: [[B0:%.]] = extractelement <8 x i16> [[B:%.]], i32 0
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	;
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4		%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5		%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6		%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7		%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7		ret <8 x float> %r7
}		}

define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @fmul_fdiv_v8f32(		; CHECK-LABEL: @fmul_fdiv_v8f32(
; SSE-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]		; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>		; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; SSE-NEXT: ret <8 x float> [[R7]]		; CHECK-NEXT: ret <8 x float> [[R7]]
;
; SLM-LABEL: @fmul_fdiv_v8f32(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.]] = shufflevector <8 x float> [[B:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[B]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
; SLM-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; SLM-NEXT: [[TMP9:%.*]] = fdiv <4 x float> [[TMP5]], [[TMP6]]
; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; SLM-NEXT: [[R3:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP3]], <8 x i32> <i32 4, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; SLM-NEXT: [[R4:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 undef, i32 undef, i32 undef>
; SLM-NEXT: [[R6:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 9, i32 10, i32 undef>
; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[R6]], <8 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11>
; SLM-NEXT: ret <8 x float> [[R7]]
;
; AVX-LABEL: @fmul_fdiv_v8f32(
; AVX-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX-NEXT: ret <8 x float> [[R7]]
;
; AVX512-LABEL: @fmul_fdiv_v8f32(
; AVX512-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; AVX512-NEXT: ret <8 x float> [[R7]]
;		;
%a0 = extractelement <8 x float> %a, i32 0		%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1		%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2		%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3		%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4		%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5		%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6		%a6 = extractelement <8 x float> %a, i32 6
Show All 26 Lines
}		}

define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {		define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) {
; SSE-LABEL: @fmul_fdiv_v4f32_const(		; SSE-LABEL: @fmul_fdiv_v4f32_const(
; SSE-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>		; SSE-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; SSE-NEXT: ret <4 x float> [[TMP1]]		; SSE-NEXT: ret <4 x float> [[TMP1]]
;		;
; SLM-LABEL: @fmul_fdiv_v4f32_const(		; SLM-LABEL: @fmul_fdiv_v4f32_const(
; SLM-NEXT: [[A0:%.]] = extractelement <4 x float> [[A:%.]], i32 0		; SLM-NEXT: [[A2:%.]] = extractelement <4 x float> [[A:%.]], i32 2
; SLM-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3		; SLM-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; SLM-NEXT: [[AB0:%.*]] = fmul float [[A0]], 2.000000e+00		; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 0, i32 1>
		; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 1.000000e+00>
; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00		; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00
; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[AB0]], i32 0		; SLM-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[A1]], i32 1		; SLM-NEXT: [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
		; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
		; SLM-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[TMP4]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2		; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[R1]], float [[A2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3		; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i32 3
; SLM-NEXT: ret <4 x float> [[R3]]		; SLM-NEXT: ret <4 x float> [[R3]]
;		;
; AVX-LABEL: @fmul_fdiv_v4f32_const(		; AVX-LABEL: @fmul_fdiv_v4f32_const(
; AVX-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>		; AVX-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX-NEXT: ret <4 x float> [[TMP1]]		; AVX-NEXT: ret <4 x float> [[TMP1]]
;		;
Show All 18 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll

Show First 20 Lines • Show All 95 Lines • ▼ Show 20 Lines	;
%r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0		%r0 = insertelement <4 x i32> undef, i32 %ab0, i32 0
%r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1		%r1 = insertelement <4 x i32> %r0, i32 %ab1, i32 1
%r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2		%r2 = insertelement <4 x i32> %r1, i32 %ab2, i32 2
%r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3		%r3 = insertelement <4 x i32> %r2, i32 %ab3, i32 3
ret <4 x i32> %r3		ret <4 x i32> %r3
}		}

define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_shl_v8i32(		; CHECK-LABEL: @ashr_shl_v8i32(
; SSE-NEXT: [[A0:%.]] = extractelement <8 x i32> [[A:%.]], i32 0		; CHECK-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1		; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2		; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3		; CHECK-NEXT: ret <8 x i32> [[R7]]
; SSE-NEXT: [[B0:%.]] = extractelement <8 x i32> [[B:%.]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SSE-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
; SSE-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
; SSE-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[R3]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @ashr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX-LABEL: @ashr_shl_v8i32(
; AVX-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @ashr_shl_v8i32(
; AVX512-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
;		;
%a0 = extractelement <8 x i32> %a, i32 0		%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1		%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2		%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3		%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4		%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5		%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6		%a6 = extractelement <8 x i32> %a, i32 6
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	;
%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7		%r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7
ret <8 x i32> %r7		ret <8 x i32> %r7
}		}

define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_lshr_shl_v8i32(		; SSE-LABEL: @ashr_lshr_shl_v8i32(
; SSE-NEXT: [[A0:%.]] = extractelement <8 x i32> [[A:%.]], i32 0		; SSE-NEXT: [[A0:%.]] = extractelement <8 x i32> [[A:%.]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1		; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6		; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7		; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; SSE-NEXT: [[B0:%.]] = extractelement <8 x i32> [[B:%.]], i32 0		; SSE-NEXT: [[B0:%.]] = extractelement <8 x i32> [[B:%.]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1		; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
; SSE-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
; SSE-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6		; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7		; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]		; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]		; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SSE-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]		; SSE-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
; SSE-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
; SSE-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]		; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]		; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0		; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1		; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2		; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3		; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4		; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5		; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
		; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
		; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
		; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
		; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6		; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7		; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SSE-NEXT: ret <8 x i32> [[R7]]		; SSE-NEXT: ret <8 x i32> [[R7]]
;		;
; SLM-LABEL: @ashr_lshr_shl_v8i32(		; SLM-LABEL: @ashr_lshr_shl_v8i32(
; SLM-NEXT: [[A0:%.]] = extractelement <8 x i32> [[A:%.]], i32 0		; SLM-NEXT: [[A0:%.]] = extractelement <8 x i32> [[A:%.]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1		; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6		; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
▲ Show 20 Lines • Show All 259 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll

	Show All 13 Lines
	@cle32 = external unnamed_addr global [32 x i32], align 16			@cle32 = external unnamed_addr global [32 x i32], align 16


	; Check that we correctly detect a splat/broadcast by leveraging the			; Check that we correctly detect a splat/broadcast by leveraging the
	; commutativity property of `xor`.			; commutativity property of `xor`.

	define void @splat(i8 %a, i8 %b, i8 %c) {			define void @splat(i8 %a, i8 %b, i8 %c) {
	; CHECK-LABEL: @splat(			; CHECK-LABEL: @splat(
	; CHECK-NEXT: [[TMP1:%.]] = insertelement <16 x i8> undef, i8 [[C:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.]] = xor i8 [[C:%.]], [[A:%.*]]
	; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[C]], i32 1			; CHECK-NEXT: store i8 [[TMP1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[C]], i32 2			; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[A]], [[C]]
	; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[C]], i32 3			; CHECK-NEXT: store i8 [[TMP2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
	; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[C]], i32 4			; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[A]], [[C]]
	; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[C]], i32 5			; CHECK-NEXT: store i8 [[TMP3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
	; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[C]], i32 6			; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[A]], [[C]]
	; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[C]], i32 7			; CHECK-NEXT: store i8 [[TMP4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
	; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[C]], i32 8			; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[C]], [[A]]
	; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[C]], i32 9			; CHECK-NEXT: store i8 [[TMP5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
	; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[C]], i32 10			; CHECK-NEXT: [[TMP6:%.]] = xor i8 [[C]], [[B:%.]]
	; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[C]], i32 11			; CHECK-NEXT: store i8 [[TMP6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
	; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[C]], i32 12			; CHECK-NEXT: [[TMP7:%.*]] = xor i8 [[C]], [[A]]
	; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13			; CHECK-NEXT: store i8 [[TMP7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
	; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14			; CHECK-NEXT: [[TMP8:%.*]] = xor i8 [[C]], [[B]]
	; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15			; CHECK-NEXT: store i8 [[TMP8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
	RKSimonAuthorUnsubmitted Done Reply Inline Actions It doesn't look like we were ever detecting this correctly as an insert+broadcast, we were relying on the cheap insert cost. RKSimon: It doesn't look like we were ever detecting this correctly as an insert+broadcast, we were…
	; CHECK-NEXT: [[TMP17:%.]] = insertelement <2 x i8> undef, i8 [[A:%.]], i32 0			; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[A]], [[C]]
	; CHECK-NEXT: [[TMP18:%.]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.]], i32 1			; CHECK-NEXT: store i8 [[TMP9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
	; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>			; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[A]], [[C]]
	; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]]			; CHECK-NEXT: store i8 [[TMP10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
	; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16			; CHECK-NEXT: [[TMP11:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
				; CHECK-NEXT: [[TMP12:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
				; CHECK-NEXT: [[TMP13:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
				; CHECK-NEXT: [[TMP14:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
				; CHECK-NEXT: [[TMP15:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
				; CHECK-NEXT: [[TMP16:%.*]] = xor i8 [[A]], [[C]]
				; CHECK-NEXT: store i8 [[TMP16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	%1 = xor i8 %c, %a			%1 = xor i8 %c, %a
	store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16			store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
	%2 = xor i8 %a, %c			%2 = xor i8 %a, %c
	store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)			store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
	%3 = xor i8 %a, %c			%3 = xor i8 %a, %c
	store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)			store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
	Show All 28 Lines



	; Check that we correctly detect that we can have the same opcode on one side by			; Check that we correctly detect that we can have the same opcode on one side by
	; leveraging the commutativity property of `xor`.			; leveraging the commutativity property of `xor`.

	define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {			define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
	; CHECK-LABEL: @same_opcode_on_one_side(			; CHECK-LABEL: @same_opcode_on_one_side(
	; CHECK-NEXT: [[TMP1:%.]] = insertelement <4 x i32> undef, i32 [[C:%.]], i32 0			; CHECK-NEXT: [[ADD1:%.]] = add i32 [[C:%.]], [[A:%.*]]
	; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1			; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[C]], [[A]]
	; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2			; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[A]], [[C]]
	; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3			; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[C]], [[A]]
	; CHECK-NEXT: [[TMP5:%.]] = insertelement <4 x i32> undef, i32 [[A:%.]], i32 0			; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ADD1]], [[A]]
	; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1			; CHECK-NEXT: store i32 [[TMP1]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2			; CHECK-NEXT: [[TMP2:%.]] = xor i32 [[B:%.]], [[ADD2]]
	; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3			; CHECK-NEXT: store i32 [[TMP2]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
	; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]]			; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[C]], [[ADD3]]
	; CHECK-NEXT: [[TMP10:%.]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.]], i32 1			; CHECK-NEXT: store i32 [[TMP3]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
	; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2			; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[A]], [[ADD4]]
	; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3			; CHECK-NEXT: store i32 [[TMP4]], i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
	; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]]
	; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	%add1 = add i32 %c, %a			%add1 = add i32 %c, %a
	%add2 = add i32 %c, %a			%add2 = add i32 %c, %a
	%add3 = add i32 %a, %c			%add3 = add i32 %a, %c
	%add4 = add i32 %c, %a			%add4 = add i32 %c, %a
	%1 = xor i32 %add1, %a			%1 = xor i32 %add1, %a
	store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16			store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
	%2 = xor i32 %b, %add2			%2 = xor i32 %b, %add2
	store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)			store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
	%3 = xor i32 %c, %add3			%3 = xor i32 %c, %add3
	store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)			store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
	%4 = xor i32 %a, %add4			%4 = xor i32 %a, %add4
	store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)			store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
	ret void			ret void
	}			}

llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll

Show All 31 Lines	entry:
ret i32 undef		ret i32 undef
}		}


declare float @llvm.powi.f32(float, i32)		declare float @llvm.powi.f32(float, i32)
define void @fn2(i32* %a, i32* %b, float* %c) {		define void @fn2(i32* %a, i32* %b, float* %c) {
; CHECK-LABEL: @fn2(		; CHECK-LABEL: @fn2(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i32 1		; CHECK-NEXT: [[I0:%.]] = load i32, i32 [[A:%.*]], align 4
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[B:%.*]], i32 1		; CHECK-NEXT: [[I1:%.]] = load i32, i32 [[B:%.*]], align 4
		; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
		; CHECK-NEXT: [[FP1:%.*]] = sitofp i32 [[ADD1]] to float
		; CHECK-NEXT: [[CALL1:%.*]] = tail call float @llvm.powi.f32(float [[FP1]], i32 [[ADD1]]) #2
		; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i32, i32 [[A]], i32 1
		; CHECK-NEXT: [[I2:%.]] = load i32, i32 [[ARRAYIDX2]], align 4
		; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[B]], i32 1
		; CHECK-NEXT: [[I3:%.]] = load i32, i32 [[ARRAYIDX3]], align 4
		; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
		; CHECK-NEXT: [[FP2:%.*]] = sitofp i32 [[ADD2]] to float
		; CHECK-NEXT: [[CALL2:%.*]] = tail call float @llvm.powi.f32(float [[FP2]], i32 [[ADD1]]) #2
; CHECK-NEXT: [[ARRAYIDX4:%.]] = getelementptr inbounds i32, i32 [[A]], i32 2		; CHECK-NEXT: [[ARRAYIDX4:%.]] = getelementptr inbounds i32, i32 [[A]], i32 2
		; CHECK-NEXT: [[I4:%.]] = load i32, i32 [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX5:%.]] = getelementptr inbounds i32, i32 [[B]], i32 2		; CHECK-NEXT: [[ARRAYIDX5:%.]] = getelementptr inbounds i32, i32 [[B]], i32 2
		; CHECK-NEXT: [[I5:%.]] = load i32, i32 [[ARRAYIDX5]], align 4
		; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
		; CHECK-NEXT: [[FP3:%.*]] = sitofp i32 [[ADD3]] to float
		; CHECK-NEXT: [[CALL3:%.*]] = tail call float @llvm.powi.f32(float [[FP3]], i32 [[ADD1]]) #2
; CHECK-NEXT: [[ARRAYIDX6:%.]] = getelementptr inbounds i32, i32 [[A]], i32 3		; CHECK-NEXT: [[ARRAYIDX6:%.]] = getelementptr inbounds i32, i32 [[A]], i32 3
; CHECK-NEXT: [[TMP0:%.]] = bitcast i32 [[A]] to <4 x i32>*		; CHECK-NEXT: [[I6:%.]] = load i32, i32 [[ARRAYIDX6]], align 4
; CHECK-NEXT: [[TMP1:%.]] = load <4 x i32>, <4 x i32> [[TMP0]], align 4
; CHECK-NEXT: [[ARRAYIDX7:%.]] = getelementptr inbounds i32, i32 [[B]], i32 3		; CHECK-NEXT: [[ARRAYIDX7:%.]] = getelementptr inbounds i32, i32 [[B]], i32 3
; CHECK-NEXT: [[TMP2:%.]] = bitcast i32 [[B]] to <4 x i32>*		; CHECK-NEXT: [[I7:%.]] = load i32, i32 [[ARRAYIDX7]], align 4
; CHECK-NEXT: [[TMP3:%.]] = load <4 x i32>, <4 x i32> [[TMP2]], align 4		; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]		; CHECK-NEXT: [[FP4:%.*]] = sitofp i32 [[ADD4]] to float
; CHECK-NEXT: [[TMP5:%.*]] = sitofp <4 x i32> [[TMP4]] to <4 x float>		; CHECK-NEXT: [[CALL4:%.*]] = tail call float @llvm.powi.f32(float [[FP4]], i32 [[ADD1]]) #2
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0		; CHECK-NEXT: store float [[CALL1]], float* [[C:%.*]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.powi.v4f32(<4 x float> [[TMP5]], i32 [[TMP6]])		; CHECK-NEXT: [[ARRAYIDX8:%.]] = getelementptr inbounds float, float [[C]], i32 1
; CHECK-NEXT: [[ARRAYIDX8:%.]] = getelementptr inbounds float, float [[C:%.*]], i32 1		; CHECK-NEXT: store float [[CALL2]], float* [[ARRAYIDX8]], align 4
; CHECK-NEXT: [[ARRAYIDX9:%.]] = getelementptr inbounds float, float [[C]], i32 2		; CHECK-NEXT: [[ARRAYIDX9:%.]] = getelementptr inbounds float, float [[C]], i32 2
		; CHECK-NEXT: store float [[CALL3]], float* [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[ARRAYIDX10:%.]] = getelementptr inbounds float, float [[C]], i32 3		; CHECK-NEXT: [[ARRAYIDX10:%.]] = getelementptr inbounds float, float [[C]], i32 3
; CHECK-NEXT: [[TMP8:%.]] = bitcast float [[C]] to <4 x float>*		; CHECK-NEXT: store float [[CALL4]], float* [[ARRAYIDX10]], align 4
; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP8]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%i0 = load i32, i32* %a, align 4		%i0 = load i32, i32* %a, align 4
%i1 = load i32, i32* %b, align 4		%i1 = load i32, i32* %b, align 4
%add1 = add i32 %i0, %i1		%add1 = add i32 %i0, %i1
%fp1 = sitofp i32 %add1 to float		%fp1 = sitofp i32 %add1 to float
%call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone		%call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
Show All 35 Lines

llvm/test/Transforms/SLPVectorizer/X86/hadd.ll

Show First 20 Lines • Show All 215 Lines • ▼ Show 20 Lines	;
%r01 = insertelement <4 x double> %r00, double %r1, i32 1		%r01 = insertelement <4 x double> %r00, double %r1, i32 1
%r02 = insertelement <4 x double> %r01, double %r2, i32 2		%r02 = insertelement <4 x double> %r01, double %r2, i32 2
%r03 = insertelement <4 x double> %r02, double %r3, i32 3		%r03 = insertelement <4 x double> %r02, double %r3, i32 3
ret <4 x double> %r03		ret <4 x double> %r03
}		}

define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @test_v8f32(		; SSE-LABEL: @test_v8f32(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]		; SSE-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; SSE-NEXT: ret <8 x float> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x float> [[R07]]
;		;
; SLM-LABEL: @test_v8f32(		; SLM-LABEL: @test_v8f32(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]		; SLM-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>		; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]		; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	;
%r04 = insertelement <8 x float> %r03, float %r4, i32 4		%r04 = insertelement <8 x float> %r03, float %r4, i32 4
%r05 = insertelement <8 x float> %r04, float %r5, i32 5		%r05 = insertelement <8 x float> %r04, float %r5, i32 5
%r06 = insertelement <8 x float> %r05, float %r6, i32 6		%r06 = insertelement <8 x float> %r05, float %r6, i32 6
%r07 = insertelement <8 x float> %r06, float %r7, i32 7		%r07 = insertelement <8 x float> %r06, float %r7, i32 7
ret <8 x float> %r07		ret <8 x float> %r07
}		}

define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE-LABEL: @test_v4i64(		; CHECK-LABEL: @test_v4i64(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4>		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5>		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6>		; CHECK-NEXT: ret <4 x i64> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x i64> [[R03]]
;
; SLM-LABEL: @test_v4i64(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; SLM-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX-LABEL: @test_v4i64(
; AVX-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX512-LABEL: @test_v4i64(
; AVX512-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX512-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
; AVX512-NEXT: ret <4 x i64> [[TMP3]]
;		;
%a0 = extractelement <4 x i64> %a, i32 0		%a0 = extractelement <4 x i64> %a, i32 0
%a1 = extractelement <4 x i64> %a, i32 1		%a1 = extractelement <4 x i64> %a, i32 1
%a2 = extractelement <4 x i64> %a, i32 2		%a2 = extractelement <4 x i64> %a, i32 2
%a3 = extractelement <4 x i64> %a, i32 3		%a3 = extractelement <4 x i64> %a, i32 3
%b0 = extractelement <4 x i64> %b, i32 0		%b0 = extractelement <4 x i64> %b, i32 0
%b1 = extractelement <4 x i64> %b, i32 1		%b1 = extractelement <4 x i64> %b, i32 1
%b2 = extractelement <4 x i64> %b, i32 2		%b2 = extractelement <4 x i64> %b, i32 2
%b3 = extractelement <4 x i64> %b, i32 3		%b3 = extractelement <4 x i64> %b, i32 3
%r0 = add i64 %a0, %a1		%r0 = add i64 %a0, %a1
%r1 = add i64 %b0, %b1		%r1 = add i64 %b0, %b1
%r2 = add i64 %a2, %a3		%r2 = add i64 %a2, %a3
%r3 = add i64 %b2, %b3		%r3 = add i64 %b2, %b3
%r00 = insertelement <4 x i64> undef, i64 %r0, i32 0		%r00 = insertelement <4 x i64> undef, i64 %r0, i32 0
%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1		%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1
%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2		%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2
%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3		%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3
ret <4 x i64> %r03		ret <4 x i64> %r03
}		}

define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @test_v8i32(		; CHECK-LABEL: @test_v8i32(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; CHECK-NEXT: ret <8 x i32> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R07]]
;
; SLM-LABEL: @test_v8i32(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX-LABEL: @test_v8i32(
; AVX-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @test_v8i32(
; AVX512-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX512-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]]
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
;		;
%a0 = extractelement <8 x i32> %a, i32 0		%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1		%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2		%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3		%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4		%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5		%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6		%a6 = extractelement <8 x i32> %a, i32 6
▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/hsub.ll

Show First 20 Lines • Show All 215 Lines • ▼ Show 20 Lines	;
%r01 = insertelement <4 x double> %r00, double %r1, i32 1		%r01 = insertelement <4 x double> %r00, double %r1, i32 1
%r02 = insertelement <4 x double> %r01, double %r2, i32 2		%r02 = insertelement <4 x double> %r01, double %r2, i32 2
%r03 = insertelement <4 x double> %r02, double %r3, i32 3		%r03 = insertelement <4 x double> %r02, double %r3, i32 3
ret <4 x double> %r03		ret <4 x double> %r03
}		}

define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: @test_v8f32(		; SSE-LABEL: @test_v8f32(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]		; SSE-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; SSE-NEXT: ret <8 x float> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x float> [[R07]]
;		;
; SLM-LABEL: @test_v8f32(		; SLM-LABEL: @test_v8f32(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>
; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]		; SLM-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]]
; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>		; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]		; SLM-NEXT: [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	;
%r04 = insertelement <8 x float> %r03, float %r4, i32 4		%r04 = insertelement <8 x float> %r03, float %r4, i32 4
%r05 = insertelement <8 x float> %r04, float %r5, i32 5		%r05 = insertelement <8 x float> %r04, float %r5, i32 5
%r06 = insertelement <8 x float> %r05, float %r6, i32 6		%r06 = insertelement <8 x float> %r05, float %r6, i32 6
%r07 = insertelement <8 x float> %r06, float %r7, i32 7		%r07 = insertelement <8 x float> %r06, float %r7, i32 7
ret <8 x float> %r07		ret <8 x float> %r07
}		}

define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {		define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) {
; SSE-LABEL: @test_v4i64(		; CHECK-LABEL: @test_v4i64(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <2 x i32> <i32 0, i32 4>		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 1, i32 5>		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; SSE-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 2, i32 6>		; CHECK-NEXT: ret <4 x i64> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> <i32 3, i32 7>
; SSE-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SSE-NEXT: ret <4 x i64> [[R03]]
;
; SLM-LABEL: @test_v4i64(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; SLM-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX-LABEL: @test_v4i64(
; AVX-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX512-LABEL: @test_v4i64(
; AVX512-NEXT: [[TMP1:%.]] = shufflevector <4 x i64> [[A:%.]], <4 x i64> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; AVX512-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]]
; AVX512-NEXT: ret <4 x i64> [[TMP3]]
;		;
%a0 = extractelement <4 x i64> %a, i32 0		%a0 = extractelement <4 x i64> %a, i32 0
%a1 = extractelement <4 x i64> %a, i32 1		%a1 = extractelement <4 x i64> %a, i32 1
%a2 = extractelement <4 x i64> %a, i32 2		%a2 = extractelement <4 x i64> %a, i32 2
%a3 = extractelement <4 x i64> %a, i32 3		%a3 = extractelement <4 x i64> %a, i32 3
%b0 = extractelement <4 x i64> %b, i32 0		%b0 = extractelement <4 x i64> %b, i32 0
%b1 = extractelement <4 x i64> %b, i32 1		%b1 = extractelement <4 x i64> %b, i32 1
%b2 = extractelement <4 x i64> %b, i32 2		%b2 = extractelement <4 x i64> %b, i32 2
%b3 = extractelement <4 x i64> %b, i32 3		%b3 = extractelement <4 x i64> %b, i32 3
%r0 = sub i64 %a0, %a1		%r0 = sub i64 %a0, %a1
%r1 = sub i64 %b0, %b1		%r1 = sub i64 %b0, %b1
%r2 = sub i64 %a2, %a3		%r2 = sub i64 %a2, %a3
%r3 = sub i64 %b2, %b3		%r3 = sub i64 %b2, %b3
%r00 = insertelement <4 x i64> undef, i64 %r0, i32 0		%r00 = insertelement <4 x i64> undef, i64 %r0, i32 0
%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1		%r01 = insertelement <4 x i64> %r00, i64 %r1, i32 1
%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2		%r02 = insertelement <4 x i64> %r01, i64 %r2, i32 2
%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3		%r03 = insertelement <4 x i64> %r02, i64 %r3, i32 3
ret <4 x i64> %r03		ret <4 x i64> %r03
}		}

define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {		define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @test_v8i32(		; CHECK-LABEL: @test_v8i32(
; SSE-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 8, i32 10>		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 1, i32 3, i32 9, i32 11>		; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]]		; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>		; CHECK-NEXT: ret <8 x i32> [[TMP3]]
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]]
; SSE-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R07]]
;
; SLM-LABEL: @test_v8i32(
; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; SLM-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
; SLM-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX-LABEL: @test_v8i32(
; AVX-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
; AVX-NEXT: ret <8 x i32> [[TMP3]]
;
; AVX512-LABEL: @test_v8i32(
; AVX512-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A:%.]], <8 x i32> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; AVX512-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]]
; AVX512-NEXT: ret <8 x i32> [[TMP3]]
;		;
%a0 = extractelement <8 x i32> %a, i32 0		%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1		%a1 = extractelement <8 x i32> %a, i32 1
%a2 = extractelement <8 x i32> %a, i32 2		%a2 = extractelement <8 x i32> %a, i32 2
%a3 = extractelement <8 x i32> %a, i32 3		%a3 = extractelement <8 x i32> %a, i32 3
%a4 = extractelement <8 x i32> %a, i32 4		%a4 = extractelement <8 x i32> %a, i32 4
%a5 = extractelement <8 x i32> %a, i32 5		%a5 = extractelement <8 x i32> %a, i32 5
%a6 = extractelement <8 x i32> %a, i32 6		%a6 = extractelement <8 x i32> %a, i32 6
▲ Show 20 Lines • Show All 123 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Show First 20 Lines • Show All 208 Lines • ▼ Show 20 Lines	;
%rc = insertelement <4 x float> %rb, float %s2, i32 2		%rc = insertelement <4 x float> %rb, float %s2, i32 2
%rd = insertelement <4 x float> %rc, float %s3, i32 3		%rd = insertelement <4 x float> %rc, float %s3, i32 3
call void @v4f32_user(<4 x float> %rd) #0		call void @v4f32_user(<4 x float> %rd) #0
ret <4 x float> %rd		ret <4 x float> %rd
}		}

; Unused insertelement		; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {		define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; BIGTHRESH-LABEL: @simple_select_no_users(		; ANY-LABEL: @simple_select_no_users(
; BIGTHRESH-NEXT: [[C0:%.]] = extractelement <4 x i32> [[C:%.]], i32 0		; ANY-NEXT: [[C0:%.]] = extractelement <4 x i32> [[C:%.]], i32 0
; BIGTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1		; ANY-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; BIGTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2		; ANY-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; BIGTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3		; ANY-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; BIGTHRESH-NEXT: [[A0:%.]] = extractelement <4 x float> [[A:%.]], i32 0		; ANY-NEXT: [[A0:%.]] = extractelement <4 x float> [[A:%.]], i32 0
; BIGTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1		; ANY-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; BIGTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2		; ANY-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; BIGTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3		; ANY-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; BIGTHRESH-NEXT: [[B0:%.]] = extractelement <4 x float> [[B:%.]], i32 0		; ANY-NEXT: [[B0:%.]] = extractelement <4 x float> [[B:%.]], i32 0
; BIGTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1		; ANY-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; BIGTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2		; ANY-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; BIGTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3		; ANY-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; BIGTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0		; ANY-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; BIGTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1		; ANY-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; BIGTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer		; ANY-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; BIGTHRESH-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0		; ANY-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
; BIGTHRESH-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1		; ANY-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
; BIGTHRESH-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer		; ANY-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
; BIGTHRESH-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0		; ANY-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
; BIGTHRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1		; ANY-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
; BIGTHRESH-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0		; ANY-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
; BIGTHRESH-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1		; ANY-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
; BIGTHRESH-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]		; ANY-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
; BIGTHRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0		; ANY-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
; BIGTHRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1		; ANY-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
; BIGTHRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0		; ANY-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
; BIGTHRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1		; ANY-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
; BIGTHRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]		; ANY-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
; BIGTHRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0		; ANY-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
; BIGTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0		; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0
; BIGTHRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1		; ANY-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
; BIGTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1		; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1
; BIGTHRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0		; ANY-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0
; BIGTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2		; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2
; BIGTHRESH-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1		; ANY-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1
; BIGTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3		; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3
; BIGTHRESH-NEXT: ret <4 x float> [[RD]]		; ANY-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_no_users(
; ZEROTHRESH-NEXT: [[C0:%.]] = extractelement <4 x i32> [[C:%.]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; ZEROTHRESH-NEXT: [[A0:%.]] = extractelement <4 x float> [[A:%.]], i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; ZEROTHRESH-NEXT: [[B0:%.]] = extractelement <4 x float> [[B:%.]], i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
; ZEROTHRESH-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
; ZEROTHRESH-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1
; ZEROTHRESH-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
; ZEROTHRESH-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1
; ZEROTHRESH-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
; ZEROTHRESH-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2
; ZEROTHRESH-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3
; ZEROTHRESH-NEXT: ret <4 x float> [[RD]]
;		;
%c0 = extractelement <4 x i32> %c, i32 0		%c0 = extractelement <4 x i32> %c, i32 0
%c1 = extractelement <4 x i32> %c, i32 1		%c1 = extractelement <4 x i32> %c, i32 1
%c2 = extractelement <4 x i32> %c, i32 2		%c2 = extractelement <4 x i32> %c, i32 2
%c3 = extractelement <4 x i32> %c, i32 3		%c3 = extractelement <4 x i32> %c, i32 3
%a0 = extractelement <4 x float> %a, i32 0		%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1		%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2		%a2 = extractelement <4 x float> %a, i32 2
▲ Show 20 Lines • Show All 275 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll

	Show First 20 Lines • Show All 105 Lines • ▼ Show 20 Lines

	define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceable(16) %x) {			define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceable(16) %x) {
	; CHECK-LABEL: @PR16739_byval(			; CHECK-LABEL: @PR16739_byval(
	; CHECK-NEXT: [[T0:%.]] = bitcast <4 x float> [[X:%.]] to i64			; CHECK-NEXT: [[T0:%.]] = bitcast <4 x float> [[X:%.]] to i64
	; CHECK-NEXT: [[T1:%.]] = load i64, i64 [[T0]], align 16			; CHECK-NEXT: [[T1:%.]] = load i64, i64 [[T0]], align 16
	; CHECK-NEXT: [[T2:%.]] = getelementptr inbounds <4 x float>, <4 x float> [[X]], i64 0, i64 2			; CHECK-NEXT: [[T2:%.]] = getelementptr inbounds <4 x float>, <4 x float> [[X]], i64 0, i64 2
	; CHECK-NEXT: [[T3:%.]] = bitcast float [[T2]] to i64*			; CHECK-NEXT: [[T3:%.]] = bitcast float [[T2]] to i64*
	; CHECK-NEXT: [[T4:%.]] = load i64, i64 [[T3]], align 8			; CHECK-NEXT: [[T4:%.]] = load i64, i64 [[T3]], align 8
	; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T1]] to i32
	; CHECK-NEXT: [[T6:%.*]] = bitcast i32 [[T5]] to float
	; CHECK-NEXT: [[T7:%.*]] = insertelement <4 x float> undef, float [[T6]], i32 0
	; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32			; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32
	; CHECK-NEXT: [[T9:%.*]] = trunc i64 [[T8]] to i32			; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[T1]], i32 0
	; CHECK-NEXT: [[T10:%.*]] = bitcast i32 [[T9]] to float			; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[T8]], i32 1
	; CHECK-NEXT: [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[T10]], i32 1			; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
				; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
				; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
				; CHECK-NEXT: [[T7:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
				; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
				; CHECK-NEXT: [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[TMP6]], i32 1
	; CHECK-NEXT: [[T12:%.*]] = trunc i64 [[T4]] to i32			; CHECK-NEXT: [[T12:%.*]] = trunc i64 [[T4]] to i32
	; CHECK-NEXT: [[T13:%.*]] = bitcast i32 [[T12]] to float			; CHECK-NEXT: [[T13:%.*]] = bitcast i32 [[T12]] to float
	; CHECK-NEXT: [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2			; CHECK-NEXT: [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2
	; CHECK-NEXT: [[T15:%.*]] = insertelement <4 x float> [[T14]], float [[T13]], i32 3			; CHECK-NEXT: [[T15:%.*]] = insertelement <4 x float> [[T14]], float [[T13]], i32 3
	; CHECK-NEXT: ret <4 x float> [[T15]]			; CHECK-NEXT: ret <4 x float> [[T15]]
	;			;
	%t0 = bitcast <4 x float>* %x to i64*			%t0 = bitcast <4 x float>* %x to i64*
	%t1 = load i64, i64* %t0, align 16			%t1 = load i64, i64* %t0, align 16
	▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -slp-threshold=-6 -slp-vectorizer -instcombine < %s \| FileCheck %s			; RUN: opt -S -slp-threshold=-6 -slp-vectorizer -instcombine < %s \| FileCheck %s

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	; These tests ensure that we do not regress due to PR31243. Note that we set			; These tests ensure that we do not regress due to PR31243. Note that we set
	; the SLP threshold to force vectorization even when not profitable.			; the SLP threshold to force vectorization even when not profitable.

	; When computing minimum sizes, if we can prove the sign bit is zero, we can			; When computing minimum sizes, if we can prove the sign bit is zero, we can
	; zero-extend the roots back to their original sizes.			; zero-extend the roots back to their original sizes.
	;			;
	define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {			define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
	; CHECK-LABEL: @PR31243_zext(			; CHECK-LABEL: @PR31243_zext(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = insertelement <2 x i8> undef, i8 [[V0:%.]], i32 0			; CHECK-NEXT: [[TMP0:%.]] = or i8 [[V0:%.]], 1
	; CHECK-NEXT: [[TMP1:%.]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.]], i32 1			; CHECK-NEXT: [[TMP1:%.]] = or i8 [[V1:%.]], 1
	; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>			; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i64
	; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0			; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[TMP2]]
	; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i64			; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP1]] to i64
	; CHECK-NEXT: [[TMPE4:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[TMP4]]			; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[TMP3]]
	; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1			; CHECK-NEXT: [[TMP6:%.]] = load i8, i8 [[TMP4]], align 1
	; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[TMP6]]
	; CHECK-NEXT: [[TMP6:%.]] = load i8, i8 [[TMPE4]], align 1
	; CHECK-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP5]], align 1			; CHECK-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP5]], align 1
	; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]			; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]
	; CHECK-NEXT: ret i8 [[TMP8]]			; CHECK-NEXT: ret i8 [[TMP8]]
	;			;
	entry:			entry:
	%tmp0 = zext i8 %v0 to i32			%tmp0 = zext i8 %v0 to i32
	%tmp1 = zext i8 %v1 to i32			%tmp1 = zext i8 %v1 to i32
	%tmp2 = or i32 %tmp0, 1			%tmp2 = or i32 %tmp0, 1
	Show All 17 Lines
	; same (either zero or one) we know that sign-extending from the smaller			; same (either zero or one) we know that sign-extending from the smaller
	; type will result in the same value. Since we don't yet perform this			; type will result in the same value. Since we don't yet perform this
	; optimization, we make the proposed smaller type (i8) larger (i16) to			; optimization, we make the proposed smaller type (i8) larger (i16) to
	; ensure correctness.			; ensure correctness.
	;			;
	define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {			define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, i8* %ptr) {
	; CHECK-LABEL: @PR31243_sext(			; CHECK-LABEL: @PR31243_sext(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = insertelement <2 x i8> undef, i8 [[V0:%.]], i32 0			; CHECK-NEXT: [[TMP0:%.]] = or i8 [[V0:%.]], 1
	; CHECK-NEXT: [[TMP1:%.]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.]], i32 1			; CHECK-NEXT: [[TMP1:%.]] = or i8 [[V1:%.]], 1
	; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>			; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[TMP0]] to i64
	; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i16>			; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[TMP2]]
	; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0			; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP1]] to i64
	; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i64			; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[TMP3]]
	; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[TMP5]]
	; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
	; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[TMP6]] to i64
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[TMP7]]
	; CHECK-NEXT: [[TMP6:%.]] = load i8, i8 [[TMP4]], align 1			; CHECK-NEXT: [[TMP6:%.]] = load i8, i8 [[TMP4]], align 1
	; CHECK-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP5]], align 1			; CHECK-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP5]], align 1
	; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]			; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[TMP6]], [[TMP7]]
	; CHECK-NEXT: ret i8 [[TMP8]]			; CHECK-NEXT: ret i8 [[TMP8]]
	;			;
	entry:			entry:
	%tmp0 = sext i8 %v0 to i32			%tmp0 = sext i8 %v0 to i32
	%tmp1 = sext i8 %v1 to i32			%tmp1 = sext i8 %v1 to i32
	Show All 9 Lines

llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll

	Show All 9 Lines

	; Function Attrs: uwtable			; Function Attrs: uwtable
	define void @_ZN1C10SwitchModeEv() local_unnamed_addr #0 comdat align 2 {			define void @_ZN1C10SwitchModeEv() local_unnamed_addr #0 comdat align 2 {
	; CHECK-LABEL: @_ZN1C10SwitchModeEv(			; CHECK-LABEL: @_ZN1C10SwitchModeEv(
	; CHECK-NEXT: for.body.lr.ph.i:			; CHECK-NEXT: for.body.lr.ph.i:
	; CHECK-NEXT: [[OR_1:%.*]] = or i64 undef, 1			; CHECK-NEXT: [[OR_1:%.*]] = or i64 undef, 1
	; CHECK-NEXT: store i64 [[OR_1]], i64* undef, align 8			; CHECK-NEXT: store i64 [[OR_1]], i64* undef, align 8
	; CHECK-NEXT: [[FOO_1:%.]] = getelementptr inbounds [[CLASS_1:%.]], %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 0			; CHECK-NEXT: [[FOO_1:%.]] = getelementptr inbounds [[CLASS_1:%.]], %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 0
				; CHECK-NEXT: [[FOO_3:%.]] = load i64, i64 [[FOO_1]], align 8
	; CHECK-NEXT: [[FOO_2:%.]] = getelementptr inbounds [[CLASS_1]], %class.1 undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1			; CHECK-NEXT: [[FOO_2:%.]] = getelementptr inbounds [[CLASS_1]], %class.1 undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1
	; CHECK-NEXT: [[TMP0:%.]] = bitcast i64 [[FOO_1]] to <2 x i64>*			; CHECK-NEXT: [[FOO_4:%.]] = load i64, i64 [[FOO_2]], align 8
	; CHECK-NEXT: [[TMP1:%.]] = load <2 x i64>, <2 x i64> [[TMP0]], align 8
	; CHECK-NEXT: [[BAR5:%.]] = load i64, i64 undef, align 8			; CHECK-NEXT: [[BAR5:%.]] = load i64, i64 undef, align 8
	; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> undef, i64 [[OR_1]], i32 0			; CHECK-NEXT: [[AND_2:%.*]] = and i64 [[OR_1]], [[FOO_3]]
	; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[BAR5]], i32 1			; CHECK-NEXT: [[AND_1:%.*]] = and i64 [[BAR5]], [[FOO_4]]
	; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i64> [[TMP3]], [[TMP1]]
	; CHECK-NEXT: [[BAR3:%.]] = getelementptr inbounds [[CLASS_2:%.]], %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0			; CHECK-NEXT: [[BAR3:%.]] = getelementptr inbounds [[CLASS_2:%.]], %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0
				; CHECK-NEXT: store i64 [[AND_2]], i64* [[BAR3]], align 8
	; CHECK-NEXT: [[BAR4:%.]] = getelementptr inbounds [[CLASS_2]], %class.2 undef, i64 0, i32 0, i32 0, i32 0, i64 1			; CHECK-NEXT: [[BAR4:%.]] = getelementptr inbounds [[CLASS_2]], %class.2 undef, i64 0, i32 0, i32 0, i32 0, i64 1
	; CHECK-NEXT: [[TMP5:%.]] = bitcast i64 [[BAR3]] to <2 x i64>*			; CHECK-NEXT: store i64 [[AND_1]], i64* [[BAR4]], align 8
	; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP5]], align 8
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	for.body.lr.ph.i:			for.body.lr.ph.i:
	%or.1 = or i64 undef, 1			%or.1 = or i64 undef, 1
	store i64 %or.1, i64* undef, align 8			store i64 %or.1, i64* undef, align 8
	%foo.1 = getelementptr inbounds %class.1, %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 0			%foo.1 = getelementptr inbounds %class.1, %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 0
	%foo.3 = load i64, i64* %foo.1, align 8			%foo.3 = load i64, i64* %foo.1, align 8
	%foo.2 = getelementptr inbounds %class.1, %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1			%foo.2 = getelementptr inbounds %class.1, %class.1* undef, i64 0, i32 0, i32 0, i32 0, i32 0, i64 1
	%foo.4 = load i64, i64* %foo.2, align 8			%foo.4 = load i64, i64* %foo.2, align 8
	%bar5 = load i64, i64* undef, align 8			%bar5 = load i64, i64* undef, align 8
	%and.2 = and i64 %or.1, %foo.3			%and.2 = and i64 %or.1, %foo.3
	%and.1 = and i64 %bar5, %foo.4			%and.1 = and i64 %bar5, %foo.4
	%bar3 = getelementptr inbounds %class.2, %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0			%bar3 = getelementptr inbounds %class.2, %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 0
	store i64 %and.2, i64* %bar3, align 8			store i64 %and.2, i64* %bar3, align 8
	%bar4 = getelementptr inbounds %class.2, %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 1			%bar4 = getelementptr inbounds %class.2, %class.2* undef, i64 0, i32 0, i32 0, i32 0, i64 1
	store i64 %and.1, i64* %bar4, align 8			store i64 %and.1, i64* %bar4, align 8
	ret void			ret void
	}			}

	; Function Attrs: norecurse nounwind uwtable			; Function Attrs: norecurse nounwind uwtable
	define void @pr35497() local_unnamed_addr #0 {			define void @pr35497() local_unnamed_addr #0 {
	; CHECK-LABEL: @pr35497(			; CHECK-LABEL: @pr35497(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load i64, i64 undef, align 1			; CHECK-NEXT: [[TMP0:%.]] = load i64, i64 undef, align 1
				; CHECK-NEXT: [[AND:%.*]] = shl i64 [[TMP0]], 2
				; CHECK-NEXT: [[SHL:%.*]] = and i64 [[AND]], 20
	; CHECK-NEXT: [[ADD:%.*]] = add i64 undef, undef			; CHECK-NEXT: [[ADD:%.*]] = add i64 undef, undef
	; CHECK-NEXT: store i64 [[ADD]], i64* undef, align 1			; CHECK-NEXT: store i64 [[ADD]], i64* undef, align 1
	; CHECK-NEXT: [[ARRAYIDX2_1:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 5			; CHECK-NEXT: [[ARRAYIDX2_1:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 5
	; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 1			; CHECK-NEXT: [[AND_1:%.*]] = shl i64 undef, 2
	; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], <i64 2, i64 2>			; CHECK-NEXT: [[SHL_1:%.*]] = and i64 [[AND_1]], 20
	; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>			; CHECK-NEXT: [[SHR_1:%.*]] = lshr i64 undef, 6
				; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[SHL]], [[SHR_1]]
	; CHECK-NEXT: [[ARRAYIDX2_2:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 4			; CHECK-NEXT: [[ARRAYIDX2_2:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 4
	; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer			; CHECK-NEXT: [[SHR_2:%.*]] = lshr i64 undef, 6
				; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[SHL_1]], [[SHR_2]]
				; CHECK-NEXT: [[AND_4:%.*]] = shl i64 [[ADD]], 2
				; CHECK-NEXT: [[SHL_4:%.*]] = and i64 [[AND_4]], 20
	; CHECK-NEXT: [[ARRAYIDX2_5:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 1			; CHECK-NEXT: [[ARRAYIDX2_5:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 1
	; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1			; CHECK-NEXT: store i64 [[ADD_1]], i64* [[ARRAYIDX2_5]], align 1
	; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0			; CHECK-NEXT: [[AND_5:%.*]] = shl nuw nsw i64 [[ADD_1]], 2
	; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1			; CHECK-NEXT: [[SHL_5:%.*]] = and i64 [[AND_5]], 20
	; CHECK-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 2, i64 2>			; CHECK-NEXT: [[SHR_5:%.*]] = lshr i64 [[ADD_1]], 6
	; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], <i64 20, i64 20>			; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i64 [[SHL_4]], [[SHR_5]]
				; CHECK-NEXT: store i64 [[ADD_5]], i64* [[ARRAYIDX2_1]], align 1
	; CHECK-NEXT: [[ARRAYIDX2_6:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 0			; CHECK-NEXT: [[ARRAYIDX2_6:%.]] = getelementptr inbounds [0 x i64], [0 x i64] undef, i64 0, i64 0
	; CHECK-NEXT: [[TMP10:%.]] = bitcast i64 [[ARRAYIDX2_6]] to <2 x i64>*			; CHECK-NEXT: store i64 [[ADD_2]], i64* [[ARRAYIDX2_6]], align 1
	; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1			; CHECK-NEXT: [[SHR_6:%.*]] = lshr i64 [[ADD_2]], 6
	; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0			; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[SHL_5]], [[SHR_6]]
	; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> undef, i64 [[TMP11]], i32 0			; CHECK-NEXT: store i64 [[ADD_6]], i64* [[ARRAYIDX2_2]], align 1
	; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP5]], i32 1
	; CHECK-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6>
	; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]
	; CHECK-NEXT: [[TMP16:%.]] = bitcast i64 [[ARRAYIDX2_2]] to <2 x i64>*
	; CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	%0 = load i64, i64* undef, align 1			%0 = load i64, i64* undef, align 1
	%and = shl i64 %0, 2			%and = shl i64 %0, 2
	%shl = and i64 %and, 20			%shl = and i64 %and, 20
	%add = add i64 undef, undef			%add = add i64 undef, undef
	store i64 %add, i64* undef, align 1			store i64 %add, i64* undef, align 1
	Show All 24 Lines

llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll

Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines	for.body: ; preds = %for.body, %entry
%add52 = add nsw i32 %add38, %add45		%add52 = add nsw i32 %add38, %add45

; YAML: --- !Passed		; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer		; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: StoresVectorized		; YAML-NEXT: Name: StoresVectorized
; YAML-NEXT: Function: foo		; YAML-NEXT: Function: foo
; YAML-NEXT: Args:		; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '		; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
; YAML-NEXT: - Cost: '-8'		; YAML-NEXT: - Cost: '-5'
; YAML-NEXT: - String: ' and with tree size '		; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '4'		; YAML-NEXT: - TreeSize: '4'

; YAML: --- !Passed		; YAML: --- !Passed
; YAML-NEXT: Pass: slp-vectorizer		; YAML-NEXT: Pass: slp-vectorizer
; YAML-NEXT: Name: VectorizedHorizontalReduction		; YAML-NEXT: Name: VectorizedHorizontalReduction
; YAML-NEXT: Function: foo		; YAML-NEXT: Function: foo
; YAML-NEXT: Args:		; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '		; YAML-NEXT: - String: 'Vectorized horizontal reduction with cost '
; YAML-NEXT: - Cost: '-4'		; YAML-NEXT: - Cost: '-7'
; YAML-NEXT: - String: ' and with tree size '		; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '1'		; YAML-NEXT: - TreeSize: '1'

%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1		%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 8		%exitcond = icmp eq i64 %indvars.iv.next, 8
br i1 %exitcond, label %for.end, label %for.body		br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body		for.end: ; preds = %for.body
ret i32 %add52		ret i32 %add52
}		}

llvm/test/Transforms/SLPVectorizer/X86/resched.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s \| FileCheck %s			; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 < %s \| FileCheck %s

	%"struct.std::array" = type { [32 x i8] }			%"struct.std::array" = type { [32 x i8] }

	; Function Attrs: nounwind uwtable			; Function Attrs: nounwind uwtable
	define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() unnamed_addr #0 align 2 {			define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() unnamed_addr #0 align 2 {
	; CHECK-LABEL: @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(			; CHECK-LABEL: @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: br i1 undef, label [[IF_END50_I:%.]], label [[IF_THEN22_I:%.]]			; CHECK-NEXT: br i1 undef, label [[IF_END50_I:%.]], label [[IF_THEN22_I:%.]]
	; CHECK: if.then22.i:			; CHECK: if.then22.i:
	; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1			; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1
	; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]			; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
	; CHECK-NEXT: [[TMP0:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 0			; CHECK-NEXT: [[TMP0:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 0
				; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SUB_I]] to i8
				; CHECK-NEXT: [[CONV_I_I1199:%.*]] = and i8 [[TMP1]], 1
				; CHECK-NEXT: store i8 [[CONV_I_I1199]], i8* [[TMP0]], align 1
				; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1
				; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_I_I]] to i8
				; CHECK-NEXT: [[CONV_1_I_I:%.*]] = and i8 [[TMP2]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_1_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 1			; CHECK-NEXT: [[ARRAYIDX_I_I7_1_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 1
				; CHECK-NEXT: store i8 [[CONV_1_I_I]], i8* [[ARRAYIDX_I_I7_1_I_I]], align 1
				; CHECK-NEXT: [[SHR_1_I_I:%.*]] = lshr i32 [[CONV31_I]], 2
				; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[SHR_1_I_I]] to i8
				; CHECK-NEXT: [[CONV_2_I_I:%.*]] = and i8 [[TMP3]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_2_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 2			; CHECK-NEXT: [[ARRAYIDX_I_I7_2_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 2
				; CHECK-NEXT: store i8 [[CONV_2_I_I]], i8* [[ARRAYIDX_I_I7_2_I_I]], align 1
				; CHECK-NEXT: [[SHR_2_I_I:%.*]] = lshr i32 [[CONV31_I]], 3
				; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[SHR_2_I_I]] to i8
				; CHECK-NEXT: [[CONV_3_I_I:%.*]] = and i8 [[TMP4]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_3_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 3			; CHECK-NEXT: [[ARRAYIDX_I_I7_3_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 3
				; CHECK-NEXT: store i8 [[CONV_3_I_I]], i8* [[ARRAYIDX_I_I7_3_I_I]], align 1
				; CHECK-NEXT: [[SHR_3_I_I:%.*]] = lshr i32 [[CONV31_I]], 4
				; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[SHR_3_I_I]] to i8
				; CHECK-NEXT: [[CONV_4_I_I:%.*]] = and i8 [[TMP5]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_4_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 4			; CHECK-NEXT: [[ARRAYIDX_I_I7_4_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 4
				; CHECK-NEXT: store i8 [[CONV_4_I_I]], i8* [[ARRAYIDX_I_I7_4_I_I]], align 1
				; CHECK-NEXT: [[SHR_4_I_I:%.*]] = lshr i32 [[CONV31_I]], 5
				; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[SHR_4_I_I]] to i8
				; CHECK-NEXT: [[CONV_5_I_I:%.*]] = and i8 [[TMP6]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 5			; CHECK-NEXT: [[ARRAYIDX_I_I7_5_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 5
				; CHECK-NEXT: store i8 [[CONV_5_I_I]], i8* [[ARRAYIDX_I_I7_5_I_I]], align 1
				; CHECK-NEXT: [[SHR_5_I_I:%.*]] = lshr i32 [[CONV31_I]], 6
				; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SHR_5_I_I]] to i8
				; CHECK-NEXT: [[CONV_6_I_I:%.*]] = and i8 [[TMP7]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 6			; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 6
				; CHECK-NEXT: store i8 [[CONV_6_I_I]], i8* [[ARRAYIDX_I_I7_6_I_I]], align 1
				; CHECK-NEXT: [[SHR_6_I_I:%.*]] = lshr i32 [[CONV31_I]], 7
				; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[SHR_6_I_I]] to i8
				; CHECK-NEXT: [[CONV_7_I_I:%.*]] = and i8 [[TMP8]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 7			; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 7
	; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[CONV31_I]], i32 0			; CHECK-NEXT: store i8 [[CONV_7_I_I]], i8* [[ARRAYIDX_I_I7_7_I_I]], align 1
	; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1			; CHECK-NEXT: [[SHR_7_I_I:%.*]] = lshr i32 [[CONV31_I]], 8
	; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2			; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_7_I_I]] to i8
	; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3			; CHECK-NEXT: [[CONV_8_I_I:%.*]] = and i8 [[TMP9]], 1
	; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4
	; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5
	; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6
	; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7
	; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
	; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 8			; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 8
				; CHECK-NEXT: store i8 [[CONV_8_I_I]], i8* [[ARRAYIDX_I_I7_8_I_I]], align 1
				; CHECK-NEXT: [[SHR_8_I_I:%.*]] = lshr i32 [[CONV31_I]], 9
				; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[SHR_8_I_I]] to i8
				; CHECK-NEXT: [[CONV_9_I_I:%.*]] = and i8 [[TMP10]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 9			; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 9
				; CHECK-NEXT: store i8 [[CONV_9_I_I]], i8* [[ARRAYIDX_I_I7_9_I_I]], align 1
				; CHECK-NEXT: [[SHR_9_I_I:%.*]] = lshr i32 [[CONV31_I]], 10
				; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[SHR_9_I_I]] to i8
				; CHECK-NEXT: [[CONV_10_I_I:%.*]] = and i8 [[TMP11]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 10			; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 10
				; CHECK-NEXT: store i8 [[CONV_10_I_I]], i8* [[ARRAYIDX_I_I7_10_I_I]], align 1
				; CHECK-NEXT: [[SHR_10_I_I:%.*]] = lshr i32 [[CONV31_I]], 11
				; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[SHR_10_I_I]] to i8
				; CHECK-NEXT: [[CONV_11_I_I:%.*]] = and i8 [[TMP12]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 11			; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 11
	; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> undef, i32 [[CONV31_I]], i32 0			; CHECK-NEXT: store i8 [[CONV_11_I_I]], i8* [[ARRAYIDX_I_I7_11_I_I]], align 1
	; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1			; CHECK-NEXT: [[SHR_11_I_I:%.*]] = lshr i32 [[CONV31_I]], 12
	; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2			; CHECK-NEXT: [[TMP13:%.*]] = trunc i32 [[SHR_11_I_I]] to i8
	; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3			; CHECK-NEXT: [[CONV_12_I_I:%.*]] = and i8 [[TMP13]], 1
	; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
	; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 12			; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 12
				; CHECK-NEXT: store i8 [[CONV_12_I_I]], i8* [[ARRAYIDX_I_I7_12_I_I]], align 1
	; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13			; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
				; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[SHR_12_I_I]] to i8
				; CHECK-NEXT: [[CONV_13_I_I:%.*]] = and i8 [[TMP14]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 13			; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 13
				; CHECK-NEXT: store i8 [[CONV_13_I_I]], i8* [[ARRAYIDX_I_I7_13_I_I]], align 1
	; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14			; CHECK-NEXT: [[SHR_13_I_I:%.*]] = lshr i32 [[CONV31_I]], 14
				; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[SHR_13_I_I]] to i8
				; CHECK-NEXT: [[CONV_14_I_I:%.*]] = and i8 [[TMP15]], 1
	; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 14			; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 14
				; CHECK-NEXT: store i8 [[CONV_14_I_I]], i8* [[ARRAYIDX_I_I7_14_I_I]], align 1
	; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15			; CHECK-NEXT: [[SHR_14_I_I:%.*]] = lshr i32 [[CONV31_I]], 15
	; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> undef, i32 [[SUB_I]], i32 0			; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[SHR_14_I_I]] to i8
	; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i32 0			; CHECK-NEXT: [[CONV_15_I_I:%.*]] = and i8 [[TMP16]], 1
	; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP15]], i32 [[TMP16]], i32 1
	; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i32 1
	; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP18]], i32 2
	; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i32 2
	; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP20]], i32 3
	; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i32 3
	; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP22]], i32 4
	; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i32 4
	; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP24]], i32 5
	; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP9]], i32 5
	; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP26]], i32 6
	; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP9]], i32 6
	; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP28]], i32 7
	; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP9]], i32 7
	; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP30]], i32 8
	; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP14]], i32 0
	; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP32]], i32 9
	; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP14]], i32 1
	; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i32> [[TMP33]], i32 [[TMP34]], i32 10
	; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP14]], i32 2
	; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i32> [[TMP35]], i32 [[TMP36]], i32 11
	; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[TMP14]], i32 3
	; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i32> [[TMP37]], i32 [[TMP38]], i32 12
	; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i32> [[TMP39]], i32 [[SHR_12_I_I]], i32 13
	; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14
	; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15
	; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8>
	; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
	; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 15			; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.]] = getelementptr inbounds %"struct.std::array", %"struct.std::array" undef, i64 0, i32 0, i64 15
	; CHECK-NEXT: [[TMP45:%.]] = bitcast i8 [[TMP0]] to <16 x i8>*			; CHECK-NEXT: store i8 [[CONV_15_I_I]], i8* [[ARRAYIDX_I_I7_15_I_I]], align 1
	; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1
	; CHECK-NEXT: unreachable			; CHECK-NEXT: unreachable
	; CHECK: if.end50.i:			; CHECK: if.end50.i:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br i1 undef, label %if.end50.i, label %if.then22.i			br i1 undef, label %if.end50.i, label %if.then22.i

	if.then22.i: ; preds = %entry			if.then22.i: ; preds = %entry
	▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/sext.ll

Show All 37 Lines	;
%x0 = sext i8 %i0 to i64		%x0 = sext i8 %i0 to i64
%x1 = sext i8 %i1 to i64		%x1 = sext i8 %i1 to i64
%v0 = insertelement <2 x i64> undef, i64 %x0, i32 0		%v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1		%v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1
ret <2 x i64> %v1		ret <2 x i64> %v1
}		}

define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {		define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
; SSE-LABEL: @loadext_4i8_to_4i32(		; SSE2-LABEL: @loadext_4i8_to_4i32(
; SSE-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1		; SSE2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; SSE-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2		; SSE2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; SSE-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3		; SSE2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; SSE-NEXT: [[I0:%.]] = load i8, i8 [[P0]], align 1		; SSE2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; SSE-NEXT: [[I1:%.]] = load i8, i8 [[P1]], align 1		; SSE2-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; SSE-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1		; SSE2-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
; SSE-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1		; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32		; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32		; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1
; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32		; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1
; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32		; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0		; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1		; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2		; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3		; SSE2-NEXT: ret <4 x i32> [[V3]]
; SSE-NEXT: ret <4 x i32> [[V3]]		;
		; SLM-LABEL: @loadext_4i8_to_4i32(
		; SLM-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
		; SLM-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
		; SLM-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
		; SLM-NEXT: [[I0:%.]] = load i8, i8 [[P0]], align 1
		; SLM-NEXT: [[I1:%.]] = load i8, i8 [[P1]], align 1
		; SLM-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1
		; SLM-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1
		; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32
		; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32
		; SLM-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32
		; SLM-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32
		; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0
		; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1
		; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2
		; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3
		; SLM-NEXT: ret <4 x i32> [[V3]]
;		;
; AVX-LABEL: @loadext_4i8_to_4i32(		; AVX-LABEL: @loadext_4i8_to_4i32(
; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1		; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2		; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3		; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*		; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; AVX-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1		; AVX-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>		; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
Show All 39 Lines
; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64		; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64
; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64		; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64
; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0		; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1		; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3		; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; SSE-NEXT: ret <4 x i64> [[V3]]		; SSE-NEXT: ret <4 x i64> [[V3]]
;		;
; AVX1-LABEL: @loadext_4i8_to_4i64(		; AVX-LABEL: @loadext_4i8_to_4i64(
; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1		; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2		; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3		; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX1-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*		; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; AVX1-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1		; AVX-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; AVX1-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1		; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
; AVX1-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1		; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>		; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX1-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64		; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX1-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64		; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0		; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0		; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1		; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; AVX-NEXT: ret <4 x i64> [[V3]]
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; AVX1-NEXT: ret <4 x i64> [[V3]]
;
; AVX2-LABEL: @loadext_4i8_to_4i64(
; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; AVX2-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX2-NEXT: ret <4 x i64> [[V3]]
;
; AVX512-LABEL: @loadext_4i8_to_4i64(
; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
; AVX512-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
; AVX512-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX512-NEXT: ret <4 x i64> [[V3]]
;		;
%p1 = getelementptr inbounds i8, i8* %p0, i64 1		%p1 = getelementptr inbounds i8, i8* %p0, i64 1
%p2 = getelementptr inbounds i8, i8* %p0, i64 2		%p2 = getelementptr inbounds i8, i8* %p0, i64 2
%p3 = getelementptr inbounds i8, i8* %p0, i64 3		%p3 = getelementptr inbounds i8, i8* %p0, i64 3
%i0 = load i8, i8* %p0, align 1		%i0 = load i8, i8* %p0, align 1
%i1 = load i8, i8* %p1, align 1		%i1 = load i8, i8* %p1, align 1
%i2 = load i8, i8* %p2, align 1		%i2 = load i8, i8* %p2, align 1
%i3 = load i8, i8* %p3, align 1		%i3 = load i8, i8* %p3, align 1
▲ Show 20 Lines • Show All 622 Lines • ▼ Show 20 Lines
; SSE-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64		; SSE-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64
; SSE-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64		; SSE-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64
; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0		; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1		; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3		; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; SSE-NEXT: ret <4 x i64> [[V3]]		; SSE-NEXT: ret <4 x i64> [[V3]]
;		;
; AVX1-LABEL: @loadext_4i16_to_4i64(		; AVX-LABEL: @loadext_4i16_to_4i64(
; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1		; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2		; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3		; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
; AVX1-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*		; AVX-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
; AVX1-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1		; AVX-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
; AVX1-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1		; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; AVX1-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1		; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>		; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX1-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64		; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX1-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64		; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0		; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0		; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1		; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; AVX-NEXT: ret <4 x i64> [[V3]]
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; AVX1-NEXT: ret <4 x i64> [[V3]]
;
; AVX2-LABEL: @loadext_4i16_to_4i64(
; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
; AVX2-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
; AVX2-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX2-NEXT: ret <4 x i64> [[V3]]
;
; AVX512-LABEL: @loadext_4i16_to_4i64(
; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
; AVX512-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
; AVX512-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX512-NEXT: ret <4 x i64> [[V3]]
;		;
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
%i0 = load i16, i16* %p0, align 1		%i0 = load i16, i16* %p0, align 1
%i1 = load i16, i16* %p1, align 1		%i1 = load i16, i16* %p1, align 1
%i2 = load i16, i16* %p2, align 1		%i2 = load i16, i16* %p2, align 1
%i3 = load i16, i16* %p3, align 1		%i3 = load i16, i16* %p3, align 1
▲ Show 20 Lines • Show All 185 Lines • ▼ Show 20 Lines
; SSE-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64		; SSE-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64
; SSE-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64		; SSE-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64
; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0		; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1		; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3		; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; SSE-NEXT: ret <4 x i64> [[V3]]		; SSE-NEXT: ret <4 x i64> [[V3]]
;		;
; AVX1-LABEL: @loadext_4i32_to_4i64(		; AVX-LABEL: @loadext_4i32_to_4i64(
; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1		; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2		; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3		; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
; AVX1-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <2 x i32>*		; AVX-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
; AVX1-NEXT: [[TMP2:%.]] = load <2 x i32>, <2 x i32> [[TMP1]], align 1		; AVX-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
; AVX1-NEXT: [[I2:%.]] = load i32, i32 [[P2]], align 1		; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
; AVX1-NEXT: [[I3:%.]] = load i32, i32 [[P3]], align 1		; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX1-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>		; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX1-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64		; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX1-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64		; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0		; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0		; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1		; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1		; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2		; AVX-NEXT: ret <4 x i64> [[V3]]
; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
; AVX1-NEXT: ret <4 x i64> [[V3]]
;
; AVX2-LABEL: @loadext_4i32_to_4i64(
; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
; AVX2-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
; AVX2-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
; AVX2-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX2-NEXT: ret <4 x i64> [[V3]]
;
; AVX512-LABEL: @loadext_4i32_to_4i64(
; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
; AVX512-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
; AVX512-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
; AVX512-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
; AVX512-NEXT: ret <4 x i64> [[V3]]
;		;
%p1 = getelementptr inbounds i32, i32* %p0, i64 1		%p1 = getelementptr inbounds i32, i32* %p0, i64 1
%p2 = getelementptr inbounds i32, i32* %p0, i64 2		%p2 = getelementptr inbounds i32, i32* %p0, i64 2
%p3 = getelementptr inbounds i32, i32* %p0, i64 3		%p3 = getelementptr inbounds i32, i32* %p0, i64 3
%i0 = load i32, i32* %p0, align 1		%i0 = load i32, i32* %p0, align 1
%i1 = load i32, i32* %p1, align 1		%i1 = load i32, i32* %p1, align 1
%i2 = load i32, i32* %p2, align 1		%i2 = load i32, i32* %p2, align 1
%i3 = load i32, i32* %p3, align 1		%i3 = load i32, i32* %p3, align 1
Show All 10 Lines

llvm/test/Transforms/SLPVectorizer/X86/zext.ll

	Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines
	; SSE-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64			; SSE-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64
	; SSE-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64			; SSE-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64
	; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0			; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
	; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1			; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
	; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
	; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3			; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; SSE-NEXT: ret <4 x i64> [[V3]]			; SSE-NEXT: ret <4 x i64> [[V3]]
	;			;
	; AVX1-LABEL: @loadext_4i8_to_4i64(			; AVX-LABEL: @loadext_4i8_to_4i64(
	; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1			; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
	; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2			; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
	; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3			; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
	; AVX1-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <2 x i8>*			; AVX-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
	; AVX1-NEXT: [[TMP2:%.]] = load <2 x i8>, <2 x i8> [[TMP1]], align 1			; AVX-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
	; AVX1-NEXT: [[I2:%.]] = load i8, i8 [[P2]], align 1			; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
	; AVX1-NEXT: [[I3:%.]] = load i8, i8 [[P3]], align 1			; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX1-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>			; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX1-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64			; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX1-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64			; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0			; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0			; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1			; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; AVX-NEXT: ret <4 x i64> [[V3]]
	; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; AVX1-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX2-LABEL: @loadext_4i8_to_4i64(
	; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
	; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
	; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
	; AVX2-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
	; AVX2-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
	; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
	; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX2-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX512-LABEL: @loadext_4i8_to_4i64(
	; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i8, i8 [[P0:%.*]], i64 1
	; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 2
	; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i8, i8 [[P0]], i64 3
	; AVX512-NEXT: [[TMP1:%.]] = bitcast i8 [[P0]] to <4 x i8>*
	; AVX512-NEXT: [[TMP2:%.]] = load <4 x i8>, <4 x i8> [[TMP1]], align 1
	; AVX512-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64>
	; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX512-NEXT: ret <4 x i64> [[V3]]
	;			;
	%p1 = getelementptr inbounds i8, i8* %p0, i64 1			%p1 = getelementptr inbounds i8, i8* %p0, i64 1
	%p2 = getelementptr inbounds i8, i8* %p0, i64 2			%p2 = getelementptr inbounds i8, i8* %p0, i64 2
	%p3 = getelementptr inbounds i8, i8* %p0, i64 3			%p3 = getelementptr inbounds i8, i8* %p0, i64 3
	%i0 = load i8, i8* %p0, align 1			%i0 = load i8, i8* %p0, align 1
	%i1 = load i8, i8* %p1, align 1			%i1 = load i8, i8* %p1, align 1
	%i2 = load i8, i8* %p2, align 1			%i2 = load i8, i8* %p2, align 1
	%i3 = load i8, i8* %p3, align 1			%i3 = load i8, i8* %p3, align 1
	▲ Show 20 Lines • Show All 622 Lines • ▼ Show 20 Lines
	; SSE-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64			; SSE-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64
	; SSE-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64			; SSE-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64
	; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0			; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
	; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1			; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
	; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
	; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3			; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; SSE-NEXT: ret <4 x i64> [[V3]]			; SSE-NEXT: ret <4 x i64> [[V3]]
	;			;
	; AVX1-LABEL: @loadext_4i16_to_4i64(			; AVX-LABEL: @loadext_4i16_to_4i64(
	; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1			; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
	; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2			; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
	; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3			; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
	; AVX1-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <2 x i16>*			; AVX-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
	; AVX1-NEXT: [[TMP2:%.]] = load <2 x i16>, <2 x i16> [[TMP1]], align 1			; AVX-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
	; AVX1-NEXT: [[I2:%.]] = load i16, i16 [[P2]], align 1			; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
	; AVX1-NEXT: [[I3:%.]] = load i16, i16 [[P3]], align 1			; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX1-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64>			; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX1-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64			; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX1-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64			; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0			; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0			; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1			; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; AVX-NEXT: ret <4 x i64> [[V3]]
	; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; AVX1-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX2-LABEL: @loadext_4i16_to_4i64(
	; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
	; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
	; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
	; AVX2-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
	; AVX2-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
	; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
	; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX2-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX512-LABEL: @loadext_4i16_to_4i64(
	; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i16, i16 [[P0:%.*]], i64 1
	; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 2
	; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i16, i16 [[P0]], i64 3
	; AVX512-NEXT: [[TMP1:%.]] = bitcast i16 [[P0]] to <4 x i16>*
	; AVX512-NEXT: [[TMP2:%.]] = load <4 x i16>, <4 x i16> [[TMP1]], align 1
	; AVX512-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64>
	; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX512-NEXT: ret <4 x i64> [[V3]]
	;			;
	%p1 = getelementptr inbounds i16, i16* %p0, i64 1			%p1 = getelementptr inbounds i16, i16* %p0, i64 1
	%p2 = getelementptr inbounds i16, i16* %p0, i64 2			%p2 = getelementptr inbounds i16, i16* %p0, i64 2
	%p3 = getelementptr inbounds i16, i16* %p0, i64 3			%p3 = getelementptr inbounds i16, i16* %p0, i64 3
	%i0 = load i16, i16* %p0, align 1			%i0 = load i16, i16* %p0, align 1
	%i1 = load i16, i16* %p1, align 1			%i1 = load i16, i16* %p1, align 1
	%i2 = load i16, i16* %p2, align 1			%i2 = load i16, i16* %p2, align 1
	%i3 = load i16, i16* %p3, align 1			%i3 = load i16, i16* %p3, align 1
	▲ Show 20 Lines • Show All 185 Lines • ▼ Show 20 Lines
	; SSE-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64			; SSE-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64
	; SSE-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64			; SSE-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64
	; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0			; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0
	; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1			; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1
	; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2
	; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3			; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; SSE-NEXT: ret <4 x i64> [[V3]]			; SSE-NEXT: ret <4 x i64> [[V3]]
	;			;
	; AVX1-LABEL: @loadext_4i32_to_4i64(			; AVX-LABEL: @loadext_4i32_to_4i64(
	; AVX1-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1			; AVX-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
	; AVX1-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2			; AVX-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
	; AVX1-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3			; AVX-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
	; AVX1-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <2 x i32>*			; AVX-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
	; AVX1-NEXT: [[TMP2:%.]] = load <2 x i32>, <2 x i32> [[TMP1]], align 1			; AVX-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
	; AVX1-NEXT: [[I2:%.]] = load i32, i32 [[P2]], align 1			; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
	; AVX1-NEXT: [[I3:%.]] = load i32, i32 [[P3]], align 1			; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX1-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>			; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX1-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64			; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX1-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64			; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX1-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0			; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX1-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0			; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX1-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1			; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX1-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1			; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX1-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2			; AVX-NEXT: ret <4 x i64> [[V3]]
	; AVX1-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3
	; AVX1-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX2-LABEL: @loadext_4i32_to_4i64(
	; AVX2-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
	; AVX2-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
	; AVX2-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
	; AVX2-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
	; AVX2-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
	; AVX2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
	; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX2-NEXT: ret <4 x i64> [[V3]]
	;
	; AVX512-LABEL: @loadext_4i32_to_4i64(
	; AVX512-NEXT: [[P1:%.]] = getelementptr inbounds i32, i32 [[P0:%.*]], i64 1
	; AVX512-NEXT: [[P2:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 2
	; AVX512-NEXT: [[P3:%.]] = getelementptr inbounds i32, i32 [[P0]], i64 3
	; AVX512-NEXT: [[TMP1:%.]] = bitcast i32 [[P0]] to <4 x i32>*
	; AVX512-NEXT: [[TMP2:%.]] = load <4 x i32>, <4 x i32> [[TMP1]], align 1
	; AVX512-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
	; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
	; AVX512-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0
	; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1
	; AVX512-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1
	; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
	; AVX512-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2
	; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
	; AVX512-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3
	; AVX512-NEXT: ret <4 x i64> [[V3]]
	;			;
	%p1 = getelementptr inbounds i32, i32* %p0, i64 1			%p1 = getelementptr inbounds i32, i32* %p0, i64 1
	%p2 = getelementptr inbounds i32, i32* %p0, i64 2			%p2 = getelementptr inbounds i32, i32* %p0, i64 2
	%p3 = getelementptr inbounds i32, i32* %p0, i64 3			%p3 = getelementptr inbounds i32, i32* %p0, i64 3
	%i0 = load i32, i32* %p0, align 1			%i0 = load i32, i32* %p0, align 1
	%i1 = load i32, i32* %p1, align 1			%i1 = load i32, i32* %p1, align 1
	%i2 = load i32, i32* %p2, align 1			%i2 = load i32, i32* %p2, align 1
	%i3 = load i32, i32* %p3, align 1			%i3 = load i32, i32* %p3, align 1
	Show All 10 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][X86] Improve extract/insert element costs (PR43605)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 245924

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/X86/arith-fp.ll

llvm/test/Analysis/CostModel/X86/extend.ll

llvm/test/Analysis/CostModel/X86/fptosi.ll

llvm/test/Analysis/CostModel/X86/fptoui.ll

llvm/test/Analysis/CostModel/X86/fround.ll

llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll

llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll

llvm/test/Analysis/CostModel/X86/load_store.ll

llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll

llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll

llvm/test/Analysis/CostModel/X86/sitofp.ll

llvm/test/Analysis/CostModel/X86/uitofp.ll

llvm/test/Analysis/CostModel/X86/vector-extract.ll

llvm/test/Analysis/CostModel/X86/vector-insert.ll

llvm/test/Transforms/LoopVectorize/X86/interleaving.ll

llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll

llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll

llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll

llvm/test/Transforms/SLPVectorizer/X86/hadd.ll

llvm/test/Transforms/SLPVectorizer/X86/hsub.ll

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll

llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll

llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll

llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll

llvm/test/Transforms/SLPVectorizer/X86/resched.ll

llvm/test/Transforms/SLPVectorizer/X86/sext.ll

llvm/test/Transforms/SLPVectorizer/X86/zext.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CostModel][X86] Improve extract/insert element costs (PR43605)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 245924

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/test/Analysis/CostModel/X86/arith-fp.ll

llvm/test/Analysis/CostModel/X86/extend.ll

llvm/test/Analysis/CostModel/X86/fptosi.ll

llvm/test/Analysis/CostModel/X86/fptoui.ll

llvm/test/Analysis/CostModel/X86/fround.ll

llvm/test/Analysis/CostModel/X86/insert-extract-at-zero.ll

llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll

llvm/test/Analysis/CostModel/X86/load_store.ll

llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll

llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll

llvm/test/Analysis/CostModel/X86/sitofp.ll

llvm/test/Analysis/CostModel/X86/uitofp.ll

llvm/test/Analysis/CostModel/X86/vector-extract.ll

llvm/test/Analysis/CostModel/X86/vector-insert.ll

llvm/test/Transforms/LoopVectorize/X86/interleaving.ll

llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll

llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll

llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll

llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll

llvm/test/Transforms/SLPVectorizer/X86/hadd.ll

llvm/test/Transforms/SLPVectorizer/X86/hsub.ll

llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll

llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll

llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll

llvm/test/Transforms/SLPVectorizer/X86/remark_horcost.ll

llvm/test/Transforms/SLPVectorizer/X86/resched.ll

llvm/test/Transforms/SLPVectorizer/X86/sext.ll

llvm/test/Transforms/SLPVectorizer/X86/zext.ll

[CostModel][X86] Improve extract/insert element costs (PR43605)
ClosedPublic