Diff 181103

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 16,103 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,

SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);		SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
if (ShuffleNumElems > NumElems)		if (ShuffleNumElems > NumElems)
Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);		Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);

return Shuffle;		return Shuffle;
}		}

		static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
		assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");

		// First, determine where the build vector is not undef.
		// TODO: We could extend this to handle zero elements as well as undefs.
		int NumBVOps = BV->getNumOperands();
		int ZextElt = -1;
		for (int i = 0; i != NumBVOps; ++i) {
		SDValue Op = BV->getOperand(i);
		if (Op.isUndef())
		continue;
		if (ZextElt == -1)
		ZextElt = i;
		else
		return SDValue();
		}
		// Bail out if there's no non-undef element.
		if (ZextElt == -1)
		return SDValue();

		// The build vector contains some number of undef elements and exactly
		// one other element. That other element must be a zero-extended scalar
		// extracted from a vector at a constant index to turn this into a shuffle.
		SDValue Zext = BV->getOperand(ZextElt);
		if (Zext.getOpcode() != ISD::ZERO_EXTEND \|\| !Zext.hasOneUse() \|\|
		RKSimonUnsubmitted Not Done Reply Inline Actions Should ANY_EXTEND be handled as well? SimplifyDemandedBits can reduce ZERO_EXTEND -> ANY_EXTEND more aggressively these days. RKSimon: Should ANY_EXTEND be handled as well? SimplifyDemandedBits can reduce ZERO_EXTEND -> ANY_EXTEND…
		spatelAuthorUnsubmitted Done Reply Inline Actions That seems ok, and I can update with a draft of that change. But any idea how to make a test that would provide coverage for that pattern? I'm not showing any existing test diffs. Alternatively, I can add a TODO enhancement comment while trying to find a way to make that happen. spatel: That seems ok, and I can update with a draft of that change. But any idea how to make a test…
		RKSimonUnsubmitted Done Reply Inline Actions TODO is fine RKSimon: TODO is fine
		Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
		!isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
		return SDValue();

		// The zero-extend must be a multiple of the source size.
		SDValue Extract = Zext.getOperand(0);
		unsigned DestSize = Zext.getValueSizeInBits();
		unsigned SrcSize = Extract.getValueSizeInBits();
		if (DestSize % SrcSize != 0)
		return SDValue();

		// Create a shuffle mask that will combine the extracted element with zeros
		// and undefs.
		int ZextRatio = DestSize / SrcSize;
		int NumMaskElts = NumBVOps * ZextRatio;
		SmallVector<int, 32> ShufMask(NumMaskElts, -1);
		for (int i = 0; i != NumMaskElts; ++i) {
		if (i / ZextRatio == ZextElt) {
		// The low bits of the (potentially translated) extracted element map to
		// the source vector. The high bits map to zero. We will use a zero vector
		// as the 2nd source operand of the shuffle, so use the 1st element of
		// that vector (mask value is number-of-elements) for the high bits.
		if (i % ZextRatio == 0)
		ShufMask[i] = Extract.getConstantOperandVal(1);
		else
		ShufMask[i] = NumMaskElts;
		}

		// Undef elements of the build vector remain undef because we initialize
		// the shuffle mask with -1.
		}

		// Turn this into a shuffle with zero if that's legal.
		EVT VecVT = Extract.getOperand(0).getValueType();
		if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
		return SDValue();

		// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
		// bitcast (shuffle V, ZeroVec, VectorMask)
		SDLoc DL(BV);
		SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
		SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
		ShufMask);
		return DAG.getBitcast(BV->getValueType(0), Shuf);
		}

// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT		// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If the types of the vectors we're extracting from allow it,		// operations. If the types of the vectors we're extracting from allow it,
// turn this into a vector_shuffle node.		// turn this into a vector_shuffle node.
SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {		SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
SDLoc DL(N);		SDLoc DL(N);
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.		// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))		if (!isTypeLegal(VT))
return SDValue();		return SDValue();

		if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
		return V;

// May only combine to shuffle after legalize if shuffle is legal.		// May only combine to shuffle after legalize if shuffle is legal.
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))		if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();		return SDValue();

bool UsesZeroVector = false;		bool UsesZeroVector = false;
unsigned NumElems = N->getNumOperands();		unsigned NumElems = N->getNumOperands();

// Record, for each element of the newly built vector, which input vector		// Record, for each element of the newly built vector, which input vector
▲ Show 20 Lines • Show All 3,100 Lines • Show Last 20 Lines

test/CodeGen/X86/buildvec-extract.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=ANY,SSE,SSE2		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=ANY,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 \| FileCheck %s --check-prefixes=ANY,SSE,SSE41		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 \| FileCheck %s --check-prefixes=ANY,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx \| FileCheck %s --check-prefixes=ANY,AVX		; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx \| FileCheck %s --check-prefixes=ANY,AVX

define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE-LABEL: extract0_i32_zext_insert0_i64_undef:		; SSE2-LABEL: extract0_i32_zext_insert0_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax		; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq		; SSE2-NEXT: retq
		;
		; SSE41-LABEL: extract0_i32_zext_insert0_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract0_i32_zext_insert0_i64_undef:		; AVX-LABEL: extract0_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vmovd %xmm0, %eax		; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0		%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
Show All 10 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0		%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert0_i64_undef:		; SSE-LABEL: extract1_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:		; SSE: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]		; SSE-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: movd %xmm0, %eax		; SSE-NEXT: retq
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $1, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract1_i32_zext_insert0_i64_undef:		; AVX-LABEL: extract1_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $1, %xmm0, %eax		; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1		%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
Show All 17 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1		%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract2_i32_zext_insert0_i64_undef:		; SSE-LABEL: extract2_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:		; SSE: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]		; SSE-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movd %xmm0, %eax		; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: movq %rax, %xmm0		; SSE-NEXT: retq
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $2, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract2_i32_zext_insert0_i64_undef:		; AVX-LABEL: extract2_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $2, %xmm0, %eax		; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2		%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
Show All 17 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2		%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract3_i32_zext_insert0_i64_undef:		; SSE-LABEL: extract3_i32_zext_insert0_i64_undef:
; SSE2: # %bb.0:		; SSE: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]		; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; SSE2-NEXT: movd %xmm0, %eax		; SSE-NEXT: retq
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i32_zext_insert0_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract3_i32_zext_insert0_i64_undef:		; AVX-LABEL: extract3_i32_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $3, %xmm0, %eax		; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3		%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
Show All 17 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3		%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE-LABEL: extract0_i32_zext_insert1_i64_undef:		; SSE2-LABEL: extract0_i32_zext_insert1_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: movd %xmm0, %eax		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq		; SSE2-NEXT: retq
		;
		; SSE41-LABEL: extract0_i32_zext_insert1_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
		; SSE41-NEXT: pxor %xmm0, %xmm0
		; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract0_i32_zext_insert1_i64_undef:		; AVX-LABEL: extract0_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vmovd %xmm0, %eax		; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0		%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
Show All 14 Lines	; AVX-NEXT: retq
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert1_i64_undef:		; SSE2-LABEL: extract1_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:		; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]		; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: movd %xmm0, %eax		; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSE41-LABEL: extract1_i32_zext_insert1_i64_undef:		; SSE41-LABEL: extract1_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:		; SSE41: # %bb.0:
; SSE41-NEXT: extractps $1, %xmm0, %eax		; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract1_i32_zext_insert1_i64_undef:		; AVX-LABEL: extract1_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $1, %xmm0, %eax		; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1		%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
Show All 22 Lines	; AVX-NEXT: retq
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract2_i32_zext_insert1_i64_undef:		; SSE2-LABEL: extract2_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:		; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]		; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: retq		; SSE2-NEXT: retq
;		;
; SSE41-LABEL: extract2_i32_zext_insert1_i64_undef:		; SSE41-LABEL: extract2_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:		; SSE41: # %bb.0:
; SSE41-NEXT: extractps $2, %xmm0, %eax		; SSE41-NEXT: xorps %xmm1, %xmm1
; SSE41-NEXT: movq %rax, %xmm0		; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: retq		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract2_i32_zext_insert1_i64_undef:		; AVX-LABEL: extract2_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $2, %xmm0, %eax		; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2		%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
Show All 20 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2		%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {		define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
; SSE2-LABEL: extract3_i32_zext_insert1_i64_undef:		; SSE-LABEL: extract3_i32_zext_insert1_i64_undef:
; SSE2: # %bb.0:		; SSE: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]		; SSE-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: movd %xmm0, %eax		; SSE-NEXT: retq
; SSE2-NEXT: movq %rax, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract3_i32_zext_insert1_i64_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: extractps $3, %xmm0, %eax
; SSE41-NEXT: movq %rax, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract3_i32_zext_insert1_i64_undef:		; AVX-LABEL: extract3_i32_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vextractps $3, %xmm0, %eax		; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3		%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {		define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
Show All 20 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3		%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64		%z = zext i32 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert0_i64_undef:		; SSE2-LABEL: extract0_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax		; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE-NEXT: retq		; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
		; SSE2-NEXT: retq
		;
		; SSE41-LABEL: extract0_i16_zext_insert0_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract0_i16_zext_insert0_i64_undef:		; AVX-LABEL: extract0_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax		; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0		%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
Show All 10 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0		%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert0_i64_undef:		; SSE2-LABEL: extract1_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax		; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE-NEXT: retq		; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,6,7]
		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
		; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,1,4,5,6,7]
		; SSE2-NEXT: retq
		RKSimonUnsubmitted Done Reply Inline Actions Please can you raise a bug on this - we should do better for this shuffle. RKSimon: Please can you raise a bug on this - we should do better for this shuffle.
		spatelAuthorUnsubmitted Done Reply Inline Actions https://bugs.llvm.org/show_bug.cgi?id=40318 spatel: https://bugs.llvm.org/show_bug.cgi?id=40318
		;
		; SSE41-LABEL: extract1_i16_zext_insert0_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract1_i16_zext_insert0_i64_undef:		; AVX-LABEL: extract1_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax		; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
		RKSimonUnsubmitted Not Done Reply Inline Actions Please can you investigate what's happening here? The xmm0[6,7] at the end seems really weird.... RKSimon: Please can you investigate what's happening here? The xmm0[6,7] at the end seems really weird...
		spatelAuthorUnsubmitted Done Reply Inline Actions Hmm - I didn't notice that before. There's some shuffle lowering madness that I haven't stepped through yet that creates several nodes before this becomes the single pshufb. I filed PR40306 to track this: https://bugs.llvm.org/show_bug.cgi?id=40306 spatel: Hmm - I didn't notice that before. There's some shuffle lowering madness that I haven't…
%e = extractelement <8 x i16> %x, i32 1		%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert0_i64_zero:		; SSE-LABEL: extract1_i16_zext_insert0_i64_zero:
Show All 9 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1		%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert0_i64_undef:		; SSE2-LABEL: extract2_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax		; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE-NEXT: retq		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
		; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
		; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
		; SSE2-NEXT: retq
		RKSimonUnsubmitted Not Done Reply Inline Actions Add this shuffle to the same bug - I think its the same culprit. RKSimon: Add this shuffle to the same bug - I think its the same culprit.
		;
		; SSE41-LABEL: extract2_i16_zext_insert0_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,xmm0[0,1]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract2_i16_zext_insert0_i64_undef:		; AVX-LABEL: extract2_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax		; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,xmm0[0,1]
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2		%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
Show All 12 Lines	; AVX-NEXT: retq
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert0_i64_undef:		; SSE-LABEL: extract3_i16_zext_insert0_i64_undef:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax		; SSE-NEXT: psrlq $48, %xmm0
; SSE-NEXT: movq %rax, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: extract3_i16_zext_insert0_i64_undef:		; AVX-LABEL: extract3_i16_zext_insert0_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax		; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3		%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 0		%r = insertelement <2 x i64> undef, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
Show All 10 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3		%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert1_i64_undef:		; SSE2-LABEL: extract0_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: retq
; SSE-NEXT: retq		;
		; SSE41-LABEL: extract0_i16_zext_insert1_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
		; SSE41-NEXT: pxor %xmm0, %xmm0
		; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract0_i16_zext_insert1_i64_undef:		; AVX-LABEL: extract0_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
		; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
		; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0		%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
Show All 12 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0		%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert1_i64_undef:		; SSE2-LABEL: extract1_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax		; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq		; SSE2-NEXT: retq
		;
		; SSE41-LABEL: extract1_i16_zext_insert1_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract1_i16_zext_insert1_i64_undef:		; AVX-LABEL: extract1_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax		; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1		%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
Show All 12 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1		%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert1_i64_undef:		; SSE2-LABEL: extract2_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax		; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: retq
; SSE-NEXT: retq		;
		; SSE41-LABEL: extract2_i16_zext_insert1_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
		; SSE41-NEXT: pxor %xmm0, %xmm0
		; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract2_i16_zext_insert1_i64_undef:		; AVX-LABEL: extract2_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax		; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2		%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
Show All 12 Lines
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2		%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1		%r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {		define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert1_i64_undef:		; SSE2-LABEL: extract3_i16_zext_insert1_i64_undef:
; SSE: # %bb.0:		; SSE2: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax		; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; SSE-NEXT: movq %rax, %xmm0		; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; SSE2-NEXT: retq
; SSE-NEXT: retq		;
		; SSE41-LABEL: extract3_i16_zext_insert1_i64_undef:
		; SSE41: # %bb.0:
		; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
		; SSE41-NEXT: pxor %xmm1, %xmm1
		; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
		; SSE41-NEXT: retq
;		;
; AVX-LABEL: extract3_i16_zext_insert1_i64_undef:		; AVX-LABEL: extract3_i16_zext_insert1_i64_undef:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax		; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AVX-NEXT: vmovq %rax, %xmm0		; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]		; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4],xmm1[5,6,7]
; AVX-NEXT: retq		; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3		%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64		%z = zext i16 %e to i64
%r = insertelement <2 x i64> undef, i64 %z, i32 1		%r = insertelement <2 x i64> undef, i64 %z, i32 1
ret <2 x i64> %r		ret <2 x i64> %r
}		}

define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {		define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
Show All 19 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] reduce buildvec of zexted extracted element to shuffle
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 181103

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/X86/buildvec-extract.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] reduce buildvec of zexted extracted element to shuffleClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 181103

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

test/CodeGen/X86/buildvec-extract.ll

[DAGCombiner] reduce buildvec of zexted extracted element to shuffle
ClosedPublic