Diff 83988

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,302 Lines • ▼ Show 20 Lines	static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
// We have to cast V2 around.		// We have to cast V2 around.
MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);		MVT MaskVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,		V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::ANDNP, DL, MaskVT,
DAG.getBitcast(MaskVT, V1Mask),		DAG.getBitcast(MaskVT, V1Mask),
DAG.getBitcast(MaskVT, V2)));		DAG.getBitcast(MaskVT, V2)));
return DAG.getNode(ISD::OR, DL, VT, V1, V2);		return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}		}

		static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
		SDValue PreservedSrc,
		const X86Subtarget &Subtarget,
		SelectionDAG &DAG);

/// \brief Try to emit a blend instruction for a shuffle.		/// \brief Try to emit a blend instruction for a shuffle.
///		///
/// This doesn't do any checks for the availability of instructions for blending		/// This doesn't do any checks for the availability of instructions for blending
/// these values. It relies on the availability of the X86ISD::BLENDI pattern to		/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
/// be matched in the backend with the type given. What it does check for is		/// be matched in the backend with the type given. What it does check for is
/// that the shuffle mask is a blend, or convertible into a blend with zero.		/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,		static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Original,		SDValue V2, ArrayRef<int> Original,
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines	for (int i = 0, Size = Mask.size(); i < Size; ++i)
MVT::i8));		MVT::i8));

V1 = DAG.getBitcast(BlendVT, V1);		V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);		V2 = DAG.getBitcast(BlendVT, V2);
return DAG.getBitcast(		return DAG.getBitcast(
VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,		VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,
DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2));		DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2));
}		}
		case MVT::v16f32:
		case MVT::v8f64:
		case MVT::v8i64:
		case MVT::v16i32:
		case MVT::v32i16:
		case MVT::v64i8: {
		MVT IntegerType =
		MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
		SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
		return getVectorMaskingNode(V1, MaskNode, V2, Subtarget, DAG);
		igorbUnsubmitted Done Reply Inline Actions you can use getVectorMaskingNode to simplify the code, all logic already implemented. igorb: you can use getVectorMaskingNode to simplify the code, all logic already implemented.
		}
default:		default:
		igorbUnsubmitted Done Reply Inline Actions not in use. igorb: not in use.
llvm_unreachable("Not a supported integer vector type!");		llvm_unreachable("Not a supported integer vector type!");
}		}
}		}

/// \brief Try to lower as a blend of elements from two inputs followed by		/// \brief Try to lower as a blend of elements from two inputs followed by
/// a single-input permutation.		/// a single-input permutation.
///		///
/// This matches the pattern where we can blend elements from two inputs and		/// This matches the pattern where we can blend elements from two inputs and
▲ Show 20 Lines • Show All 4,408 Lines • ▼ Show 20 Lines	static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Op =		if (SDValue Op =
lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))		lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Op;		return Op;

if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
V2, DAG, Subtarget))		V2, DAG, Subtarget))
return V;		return V;

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;

return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.		/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,		static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
Show All 18 Lines	if (V2.isUndef())
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1,		return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));		getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));

// Use dedicated unpack instructions for masks that match their pattern.		// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
return Unpck;		return Unpck;

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;

// Otherwise, fall back to a SHUFPS sequence.		// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);		return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}		}
// If we have AVX512F support, we can use VEXPAND.		// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
V1, V2, DAG, Subtarget))		V1, V2, DAG, Subtarget))
return V;		return V;

▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Unpck =		if (SDValue Unpck =
lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))		lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;		return Unpck;
// If we have AVX512F support, we can use VEXPAND.		// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
V2, DAG, Subtarget))		V2, DAG, Subtarget))
return V;		return V;

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;

return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 16-lane 32-bit integer shuffles.		/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,		SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
CastV1, CastV2, DAG);		CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v16i32, ShufPS);		return DAG.getBitcast(MVT::v16i32, ShufPS);
}		}
// If we have AVX512F support, we can use VEXPAND.		// If we have AVX512F support, we can use VEXPAND.
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,		if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
V1, V2, DAG, Subtarget))		V1, V2, DAG, Subtarget))
return V;		return V;

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}		}

/// \brief Handle lowering of 32-lane 16-bit integer shuffles.		/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
Show All 30 Lines	if (V2.isUndef()) {
if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {		if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) {
// As this is a single-input shuffle, the repeated mask should be		// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16		// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v32 case.		// lowering to handle even the v32 case.
return lowerV8I16GeneralSingleInputVectorShuffle(		return lowerV8I16GeneralSingleInputVectorShuffle(
DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);		DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
}		}
}		}

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;

return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
		RKSimonUnsubmitted Done Reply Inline Actions Unnecessary as AVX512BW is a requirement for v32i16 - see the assert at the top of function. RKSimon: Unnecessary as AVX512BW is a requirement for v32i16 - see the assert at the top of function.
}		}

/// \brief Handle lowering of 64-lane 8-bit integer shuffles.		/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,		static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
const SmallBitVector &Zeroable,		const SmallBitVector &Zeroable,
SDValue V1, SDValue V2,		SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
Show All 23 Lines	static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(		if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))		DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return Rotate;		return Rotate;

if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(		if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))		DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
return PSHUFB;		return PSHUFB;

// VBMI can use VPERMV/VPERMV3 byte shuffles.		// VBMI can use VPERMV/VPERMV3 byte shuffles.
		RKSimonUnsubmitted Done Reply Inline Actions Unnecessary as AVX512BW is a requirement for v64i8 - see the assert at the top of function. RKSimon: Unnecessary as AVX512BW is a requirement for v64i8 - see the assert at the top of function.
if (Subtarget.hasVBMI())		if (Subtarget.hasVBMI())
return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);		return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);

// Try to create an in-lane repeating shuffle mask and then shuffle the		// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.		// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(		if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))		DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;		return V;

		if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
		Zeroable, Subtarget, DAG))
		return Blend;

// FIXME: Implement direct support for this type!		// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);		return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}		}

/// \brief High-level routine to lower various 512-bit x86 vector shuffles.		/// \brief High-level routine to lower various 512-bit x86 vector shuffles.
///		///
/// This routine either breaks down the specific type of a 512-bit x86 vector		/// This routine either breaks down the specific type of a 512-bit x86 vector
/// shuffle or splits it into two 256-bit shuffles and fuses the results back		/// shuffle or splits it into two 256-bit shuffles and fuses the results back
▲ Show 20 Lines • Show All 21,645 Lines • Show Last 20 Lines

test/CodeGen/X86/merge-consecutive-loads-512.ll

Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines	; X32-AVX512F-NEXT: retl
%res6 = insertelement <8 x double> %res3, double 0.0, i32 6		%res6 = insertelement <8 x double> %res3, double 0.0, i32 6
%res7 = insertelement <8 x double> %res6, double 0.0, i32 7		%res7 = insertelement <8 x double> %res6, double 0.0, i32 7
ret <8 x double> %res7		ret <8 x double> %res7
}		}

define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {		define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_1u3u5zu8:		; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vmovupd 8(%rdi), %zmm1		; ALL-NEXT: movb $32, %al
; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2		; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: vmovapd {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7>		; ALL-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z}
; ALL-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq		; ALL-NEXT: retq
;		;
; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:		; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
; X32-AVX512F: # BB#0:		; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-AVX512F-NEXT: movl 4(%esp), %eax
; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm1		; X32-AVX512F-NEXT: movb $32, %cl
; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2		; X32-AVX512F-NEXT: kmovw %ecx, %k1
; X32-AVX512F-NEXT: vmovapd {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>		; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z}
; X32-AVX512F-NEXT: vpermi2pd %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl		; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1		%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr2 = getelementptr inbounds double, double* %ptr, i64 3		%ptr2 = getelementptr inbounds double, double* %ptr, i64 3
%ptr4 = getelementptr inbounds double, double* %ptr, i64 5		%ptr4 = getelementptr inbounds double, double* %ptr, i64 5
%ptr7 = getelementptr inbounds double, double* %ptr, i64 8		%ptr7 = getelementptr inbounds double, double* %ptr, i64 8
%val0 = load double, double* %ptr0		%val0 = load double, double* %ptr0
%val2 = load double, double* %ptr2		%val2 = load double, double* %ptr2
%val4 = load double, double* %ptr4		%val4 = load double, double* %ptr4
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	; X32-AVX512F-NEXT: retl
%res6 = insertelement <8 x i64> %res4, i64 0, i32 6		%res6 = insertelement <8 x i64> %res4, i64 0, i32 6
%res7 = insertelement <8 x i64> %res6, i64 0, i32 7		%res7 = insertelement <8 x i64> %res6, i64 0, i32 7
ret <8 x i64> %res7		ret <8 x i64> %res7
}		}

define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {		define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_1u3u5zu8:		; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vmovdqu64 8(%rdi), %zmm1		; ALL-NEXT: movb $32, %al
; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2		; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,u,2,u,4,13,u,7>		; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
; ALL-NEXT: vpermi2q %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq		; ALL-NEXT: retq
;		;
; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:		; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
; X32-AVX512F: # BB#0:		; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-AVX512F-NEXT: movl 4(%esp), %eax
; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm1		; X32-AVX512F-NEXT: movb $32, %cl
; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2		; X32-AVX512F-NEXT: kmovw %ecx, %k1
; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>		; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z}
; X32-AVX512F-NEXT: vpermi2q %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl		; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1		%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
%ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3		%ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
%ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5		%ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
%ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8		%ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
%val0 = load i64, i64* %ptr0		%val0 = load i64, i64* %ptr0
%val2 = load i64, i64* %ptr2		%val2 = load i64, i64* %ptr2
%val4 = load i64, i64* %ptr4		%val4 = load i64, i64* %ptr4
▲ Show 20 Lines • Show All 194 Lines • ▼ Show 20 Lines	; X32-AVX512F-NEXT: retl
%resE = insertelement <16 x i32> %resC, i32 %valE, i32 14		%resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
%resF = insertelement <16 x i32> %resE, i32 %valF, i32 15		%resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
ret <16 x i32> %resF		ret <16 x i32> %resF
}		}

define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {		define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:		; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vmovdqu32 (%rdi), %zmm1		; ALL-NEXT: movw $8240, %ax # imm = 0x2030
; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2		; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>		; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
; ALL-NEXT: vpermi2d %zmm2, %zmm1, %zmm0
; ALL-NEXT: retq		; ALL-NEXT: retq
;		;
; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:		; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
; X32-AVX512F: # BB#0:		; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-AVX512F-NEXT: movl 4(%esp), %eax
; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm1		; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030
; X32-AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2		; X32-AVX512F-NEXT: kmovw %ecx, %k1
; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>		; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z}
; X32-AVX512F-NEXT: vpermi2d %zmm2, %zmm1, %zmm0
; X32-AVX512F-NEXT: retl		; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0		%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3		%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
%ptrC = getelementptr inbounds i32, i32* %ptr, i64 12		%ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
%ptrE = getelementptr inbounds i32, i32* %ptr, i64 14		%ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
%ptrF = getelementptr inbounds i32, i32* %ptr, i64 15		%ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
%val0 = load i32, i32* %ptr0		%val0 = load i32, i32* %ptr0
%val3 = load i32, i32* %ptr3		%val3 = load i32, i32* %ptr3
▲ Show 20 Lines • Show All 247 Lines • Show Last 20 Lines

test/CodeGen/X86/sse3-avx-addsub.ll

	Show First 20 Lines • Show All 113 Lines • ▼ Show 20 Lines
	; AVX1-LABEL: test5:			; AVX1-LABEL: test5:
	; AVX1: # BB#0:			; AVX1: # BB#0:
	; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0			; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0
	; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1			; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1
	; AVX1-NEXT: retq			; AVX1-NEXT: retq
	;			;
	; AVX512-LABEL: test5:			; AVX512-LABEL: test5:
	; AVX512: # BB#0:			; AVX512: # BB#0:
	; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2			; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2
	; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0			; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA
	; AVX512-NEXT: vshufps {{.*#+}} zmm0 = zmm0[0,2],zmm2[1,3],zmm0[4,6],zmm2[5,7],zmm0[8,10],zmm2[9,11],zmm0[12,14],zmm2[13,15]			; AVX512-NEXT: kmovw %eax, %k1
	; AVX512-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15]			; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
				; AVX512-NEXT: vmovaps %zmm2, %zmm0
	; AVX512-NEXT: retq			; AVX512-NEXT: retq
	%add = fadd <16 x float> %A, %B			%add = fadd <16 x float> %A, %B
	%sub = fsub <16 x float> %A, %B			%sub = fsub <16 x float> %A, %B
	%vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>			%vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
	ret <16 x float> %vecinit2			ret <16 x float> %vecinit2
	}			}

	define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {			define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
	▲ Show 20 Lines • Show All 165 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-512-v16.ll

Show First 20 Lines • Show All 246 Lines • ▼ Show 20 Lines	; ALL-NEXT: retq
%c = load <16 x i32>, <16 x i32>* %b		%c = load <16 x i32>, <16 x i32>* %b
%d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>		%d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
ret <16 x i32> %d		ret <16 x i32> %d
}		}

define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {		define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:		; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
; ALL: # BB#0:		; ALL: # BB#0:
; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,1,2,19,u,u,u,u,u,u,u,u,u,u,u,u>		; ALL-NEXT: movw $8, %ax
; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0		; ALL-NEXT: kmovw %eax, %k1
		; ALL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
		; ALL-NEXT: vmovdqa64 %zmm1, %zmm0
; ALL-NEXT: retq		; ALL-NEXT: retq
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>		%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i32> %c		ret <16 x i32> %c
}		}

define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {		define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:		; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:		; ALL: # BB#0:
▲ Show 20 Lines • Show All 285 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-512-v32.ll

	Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28>			%c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28>
	ret <32 x i16> %c			ret <32 x i16> %c
	}			}

	define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) {			define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) {
	; ALL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:			; ALL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz:
	; ALL: # BB#0:			; ALL: # BB#0:
	; ALL-NEXT: vmovdqu16 {{.*#+}} zmm2 = [32,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]			; ALL-NEXT: movl $1, %eax
	; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1			; ALL-NEXT: kmovd %eax, %k0
	; ALL-NEXT: vpermt2w %zmm0, %zmm2, %zmm1			; ALL-NEXT: knotd %k0, %k1
				RKSimonUnsubmitted Done Reply Inline Actions Any idea why this isn't using a blend with zero: _mm512_maskz_mov_epi16 ? RKSimon: Any idea why this isn't using a blend with zero: _mm512_maskz_mov_epi16 ?
				m_zuckermanAuthorUnsubmitted Not Done Reply Inline Actions Patterns were missing, This was changed in commit 291368 m_zuckerman: Patterns were missing, This was changed in commit 291368
	; ALL-NEXT: vmovdqa64 %zmm1, %zmm0			; ALL-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
	; ALL-NEXT: retq			; ALL-NEXT: retq
	%shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>			%shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
	ret <32 x i16> %shuffle			ret <32 x i16> %shuffle
	}			}

	define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {			define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
	; ALL-LABEL: insert_dup_mem_v32i16_i32:			; ALL-LABEL: insert_dup_mem_v32i16_i32:
	; ALL: # BB#0:			; ALL: # BB#0:
	▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-512-v8.ll

Show First 20 Lines • Show All 1,173 Lines • ▼ Show 20 Lines	; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>		%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x i64> %shuffle		ret <8 x i64> %shuffle
}		}

define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {		define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
;		;
; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:		; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
; AVX512F: # BB#0:		; AVX512F: # BB#0:
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]		; AVX512F-NEXT: movb $-86, %al
; AVX512F-NEXT: vpermi2q %zmm0, %zmm1, %zmm2		; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0		; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
; AVX512F-NEXT: retq		; AVX512F-NEXT: retq
;		;
		;
; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:		; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
; AVX512F-32: # BB#0:		; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]		; AVX512F-32-NEXT: movb $-86, %al
; AVX512F-32-NEXT: vpermi2q %zmm0, %zmm1, %zmm2		; AVX512F-32-NEXT: kmovw %eax, %k1
; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0		; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
; AVX512F-32-NEXT: retl		; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>		%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i64> %shuffle		ret <8 x i64> %shuffle
}		}

define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {		define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
;		;
; AVX512F-LABEL: shuffle_v8i64_08080808:		; AVX512F-LABEL: shuffle_v8i64_08080808:
▲ Show 20 Lines • Show All 1,374 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-to-blend-avx512.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx \| FileCheck %s --check-prefix=SKX
				; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl \| FileCheck %s --check-prefix=KNL

				define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){
				RKSimonUnsubmitted Not Done Reply Inline Actions Move these into vector-shuffle-avx512.ll ? RKSimon: Move these into vector-shuffle-avx512.ll ?
				; SKX-LABEL: test_mm512_mask_blend_epi8:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: movl $2863311530, %eax # imm = 0xAAAAAAAA
				; SKX-NEXT: kmovq %rax, %k1
				; SKX-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_epi8:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: vpbroadcastw {{.*}}(%rip), %ymm4
				; KNL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
				; KNL-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <64 x i8> %A, <64 x i8> %W, <64 x i32> <i32 64, i32 1, i32 66, i32 3, i32 68, i32 5, i32 70, i32 7, i32 72, i32 9, i32 74, i32 11, i32 76, i32 13, i32 78, i32 15, i32 80, i32 17, i32 82, i32 19, i32 84, i32 21, i32 86, i32 23, i32 88, i32 25, i32 90, i32 27, i32 92, i32 29, i32 94, i32 31, i32 96, i32 33, i32 98, i32 35, i32 100, i32 37, i32 102, i32 39, i32 104, i32 41, i32 106, i32 43, i32 108, i32 45, i32 110, i32 47, i32 112, i32 49, i32 114, i32 51, i32 116, i32 53, i32 118, i32 55, i32 120, i32 57, i32 122, i32 59, i32 124, i32 61, i32 126, i32 63>
				ret <64 x i8> %0
				}

				define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
				; SKX-LABEL: test_mm512_mask_blend_epi16:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
				; SKX-NEXT: kmovd %eax, %k1
				; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_epi16:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7],ymm2[8],ymm0[9],ymm2[10],ymm0[11],ymm2[12],ymm0[13],ymm2[14],ymm0[15]
				; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0],ymm1[1],ymm3[2],ymm1[3],ymm3[4],ymm1[5],ymm3[6],ymm1[7],ymm3[8],ymm1[9],ymm3[10],ymm1[11],ymm3[12],ymm1[13],ymm3[14],ymm1[15]
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
				ret <32 x i16> %0
				}

				define <16 x i32> @test_mm512_mask_blend_epi32(<16 x i32> %A, <16 x i32> %W){
				; SKX-LABEL: test_mm512_mask_blend_epi32:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; SKX-NEXT: kmovw %eax, %k1
				; SKX-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_epi32:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <16 x i32> %A, <16 x i32> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
				ret <16 x i32> %0
				}

				define <8 x i64> @test_mm512_mask_blend_epi64(<8 x i64> %A, <8 x i64> %W){
				; SKX-LABEL: test_mm512_mask_blend_epi64:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: movb $-86, %al
				; SKX-NEXT: kmovb %eax, %k1
				; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_epi64:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: movb $-86, %al
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1}
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <8 x i64> %A, <8 x i64> %W, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
				ret <8 x i64> %0
				}

				define <16 x float> @test_mm512_mask_blend_ps(<16 x float> %A, <16 x float> %W){
				; SKX-LABEL: test_mm512_mask_blend_ps:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; SKX-NEXT: kmovw %eax, %k1
				; SKX-NEXT: vmovaps %zmm1, %zmm0 {%k1}
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_ps:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: movw $-21846, %ax # imm = 0xAAAA
				; KNL-NEXT: kmovw %eax, %k1
				; KNL-NEXT: vmovaps %zmm1, %zmm0 {%k1}
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <16 x float> %A, <16 x float> %W, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
				ret <16 x float> %0
				}

				define <8 x double> @test_mm512_mask_blend_pd(<8 x double> %A, <8 x double> %W){
				; SKX-LABEL: test_mm512_mask_blend_pd:
				; SKX: # BB#0: # %entry
				; SKX-NEXT: vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
				; SKX-NEXT: retq
				;
				; KNL-LABEL: test_mm512_mask_blend_pd:
				; KNL: # BB#0: # %entry
				; KNL-NEXT: vshufpd {{.*#+}} zmm0 = zmm1[0],zmm0[1],zmm1[2],zmm0[3],zmm1[4],zmm0[5],zmm1[6],zmm0[7]
				; KNL-NEXT: retq
				entry:
				%0 = shufflevector <8 x double> %A, <8 x double> %W, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
				ret <8 x double> %0
				}

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX512] Adding missing shuffle lowering to blend mask instructions (VPBLENDMB/VPBLENDMW/VPBLENDMD/VPBLENDMQ) .
ClosedPublic

Details

Diff Detail

Event Timeline

Script:

/build/build-ubsan/./bin/llc < /src/test/CodeGen/X86/vector-shuffle-avx512.ll -mtriple=x86_64-pc-linux-gnu -mcpu=knl | /build/build-ubsan/./bin/FileCheck /src/test/CodeGen/X86/vector-shuffle-avx512.ll --check-prefix=KNL

Command Output (stderr):

Revision Contents

Diff 83988

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/merge-consecutive-loads-512.ll

test/CodeGen/X86/sse3-avx-addsub.ll

test/CodeGen/X86/vector-shuffle-512-v16.ll

test/CodeGen/X86/vector-shuffle-512-v32.ll

test/CodeGen/X86/vector-shuffle-512-v8.ll

test/CodeGen/X86/vector-shuffle-to-blend-avx512.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AVX512] Adding missing shuffle lowering to blend mask instructions (VPBLENDMB/VPBLENDMW/VPBLENDMD/VPBLENDMQ) . ClosedPublic

Details

Diff Detail

Event Timeline

Script:

/build/build-ubsan/./bin/llc < /src/test/CodeGen/X86/vector-shuffle-avx512.ll -mtriple=x86_64-pc-linux-gnu -mcpu=knl | /build/build-ubsan/./bin/FileCheck /src/test/CodeGen/X86/vector-shuffle-avx512.ll --check-prefix=KNL

Command Output (stderr):

Revision Contents

Diff 83988

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/merge-consecutive-loads-512.ll

test/CodeGen/X86/sse3-avx-addsub.ll

test/CodeGen/X86/vector-shuffle-512-v16.ll

test/CodeGen/X86/vector-shuffle-512-v32.ll

test/CodeGen/X86/vector-shuffle-512-v8.ll

test/CodeGen/X86/vector-shuffle-to-blend-avx512.ll

[X86][AVX512] Adding missing shuffle lowering to blend mask instructions (VPBLENDMB/VPBLENDMW/VPBLENDMD/VPBLENDMQ) .
ClosedPublic