Diff 83963

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 31,214 Lines • ▼ Show 20 Lines	static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
if (!ShiftAmt \|\| ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)		if (!ShiftAmt \|\| ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();		return SDValue();

// Create a greater-than comparison against -1. We don't use the more obvious		// Create a greater-than comparison against -1. We don't use the more obvious
// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.		// greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);		return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}		}

		/// Check if truncation with saturation form type \p SrcVT to \p DstVT
		/// is valid for the given \p Subtarget.
		static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
		const X86Subtarget &Subtarget) {
		if (!Subtarget.hasAVX512())
		return false;

		// FIXME: Scalar type may be supported if we move it to vector register.
		if (!SrcVT.isVector() \|\| !SrcVT.isSimple() \|\| SrcVT.getSizeInBits() > 512)
		return false;

		EVT SrcElVT = SrcVT.getScalarType();
		EVT DstElVT = DstVT.getScalarType();
		if (SrcElVT.getSizeInBits() < 16 \|\| SrcElVT.getSizeInBits() > 64)
		return false;
		if (DstElVT.getSizeInBits() < 8 \|\| DstElVT.getSizeInBits() > 32)
		return false;
		if (SrcVT.is512BitVector() \|\| Subtarget.hasVLX())
		return SrcElVT.getSizeInBits() >= 32 \|\| Subtarget.hasBWI();
		return false;
		}

		/// Return true if VPACK* instruction can be used for the given types
		/// and it is avalable on \p Subtarget.
		static bool
		isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
		if (Subtarget.hasSSE2())
		// v16i16 -> v16i8
		if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
		return true;
		if (Subtarget.hasSSE41())
		// v8i32 -> v8i16
		if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
		return true;
		return false;
		}

		/// Detect a pattern of truncation with saturation:
		/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
		/// Return the source value to be truncated or SDValue() if the pattern was not
		/// matched.
		static SDValue detectUSatPattern(SDValue In, EVT VT) {
		if (In.getOpcode() != ISD::UMIN)
		return SDValue();

		//Saturation with truncation. We truncate from InVT to VT.
		assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
		"Unexpected types for truncate operation");

		APInt C;
		if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
		// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
		// the element size of the destination type.
		return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
		SDValue();
		}
		return SDValue();
		}

		/// Detect a pattern of truncation with saturation:
		/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
		/// The types should allow to use VPMOVUS* instruction on AVX512.
		/// Return the source value to be truncated or SDValue() if the pattern was not
		/// matched.
		static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
		const X86Subtarget &Subtarget) {
		if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
		return SDValue();
		return detectUSatPattern(In, VT);
		}

		static SDValue
		combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		SDValue USatVal = detectUSatPattern(In, VT);
		if (USatVal) {
		if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
		return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
		if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
		SDValue Lo, Hi;
		std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
		return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
		}
		}
		return SDValue();
		}

/// This function detects the AVG pattern between vectors of unsigned i8/i16,		/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient		/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.		/// X86ISD::AVG instruction.
static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,		static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
const X86Subtarget &Subtarget,		const X86Subtarget &Subtarget,
const SDLoc &DL) {		const SDLoc &DL) {
if (!VT.isVector() \|\| !VT.isSimple())		if (!VT.isVector() \|\| !VT.isSimple())
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 550 Lines • ▼ Show 20 Lines	if (St->isTruncatingStore() && VT.isVector()) {
// replace the trunc store by a normal store with the result of X86ISD::AVG		// replace the trunc store by a normal store with the result of X86ISD::AVG
// instruction.		// instruction.
if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,		if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG,
Subtarget, dl))		Subtarget, dl))
return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),		return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),		St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());		St->getMemOperand()->getFlags());

		if (SDValue Val =
		detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
		return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
		dl, Val, St->getBasePtr(),
		St->getMemoryVT(), St->getMemOperand(), DAG);

const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();		unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");		assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getScalarSizeInBits();		unsigned FromSz = VT.getScalarSizeInBits();
unsigned ToSz = StVT.getScalarSizeInBits();		unsigned ToSz = StVT.getScalarSizeInBits();

// The truncating store is legal in some cases. For example		// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw		// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
▲ Show 20 Lines • Show All 604 Lines • ▼ Show 20 Lines	static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
// Attempt to pre-truncate inputs to arithmetic ops instead.		// Attempt to pre-truncate inputs to arithmetic ops instead.
if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))		if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
return V;		return V;

// Try to detect AVG pattern first.		// Try to detect AVG pattern first.
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))		if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;		return Avg;

		// Try to combine truncation with unsigned saturation.
		if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
		return Val;

// The bitcast source is a direct mmx result.		// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx		// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {		if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
SDValue BCSrc = Src.getOperand(0);		SDValue BCSrc = Src.getOperand(0);
if (BCSrc.getValueType() == MVT::x86mmx)		if (BCSrc.getValueType() == MVT::x86mmx)
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);		return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
}		}

▲ Show 20 Lines • Show All 2,411 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/avx-trunc.ll

	Show All 33 Lines
	; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1			; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
	; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0			; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
	; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]			; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
	; CHECK-NEXT: vzeroupper			; CHECK-NEXT: vzeroupper
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%B = trunc <16 x i16> %A to <16 x i8>			%B = trunc <16 x i16> %A to <16 x i8>
	ret <16 x i8> %B			ret <16 x i8> %B
	}			}

				define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
				; CHECK-LABEL: usat_trunc_wb_256:
				; CHECK: # BB#0:
				; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
				; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: vzeroupper
				; CHECK-NEXT: retq
				%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x6 = trunc <16 x i16> %x5 to <16 x i8>
				ret <16 x i8> %x6
				}

				define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
				; CHECK-LABEL: usat_trunc_dw_256:
				; CHECK: # BB#0:
				; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
				; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
				; CHECK-NEXT: vzeroupper
				; CHECK-NEXT: retq
				%x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
				%x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
				%x6 = trunc <8 x i32> %x5 to <8 x i16>
				ret <8 x i16> %x6
				}

llvm/trunk/test/CodeGen/X86/avx512-trunc.ll

	Show First 20 Lines • Show All 494 Lines • ▼ Show 20 Lines
	; SKX-LABEL: trunc_wb_128_mem:			; SKX-LABEL: trunc_wb_128_mem:
	; SKX: ## BB#0:			; SKX: ## BB#0:
	; SKX-NEXT: vpmovwb %xmm0, (%rdi)			; SKX-NEXT: vpmovwb %xmm0, (%rdi)
	; SKX-NEXT: retq			; SKX-NEXT: retq
	%x = trunc <8 x i16> %i to <8 x i8>			%x = trunc <8 x i16> %i to <8 x i8>
	store <8 x i8> %x, <8 x i8>* %res			store <8 x i8> %x, <8 x i8>* %res
	ret void			ret void
	}			}


				define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
				; KNL-LABEL: usat_trunc_wb_256_mem:
				; KNL: ## BB#0:
				; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
				; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
				; KNL-NEXT: vmovdqu %xmm0, (%rdi)
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_wb_256_mem:
				; SKX: ## BB#0:
				; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
				; SKX-NEXT: retq
				%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x6 = trunc <16 x i16> %x5 to <16 x i8>
				store <16 x i8> %x6, <16 x i8>* %res, align 1
				ret void
				}

				define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
				; KNL-LABEL: usat_trunc_wb_256:
				; KNL: ## BB#0:
				; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
				; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_wb_256:
				; SKX: ## BB#0:
				; SKX-NEXT: vpmovuswb %ymm0, %xmm0
				; SKX-NEXT: retq
				%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x6 = trunc <16 x i16> %x5 to <16 x i8>
				ret <16 x i8> %x6
				}

				define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
				; KNL-LABEL: usat_trunc_wb_128_mem:
				; KNL: ## BB#0:
				; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
				; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
				; KNL-NEXT: vmovq %xmm0, (%rdi)
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_wb_128_mem:
				; SKX: ## BB#0:
				; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
				; SKX-NEXT: retq
				%x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x6 = trunc <8 x i16> %x5 to <8 x i8>
				store <8 x i8> %x6, <8 x i8>* %res, align 1
				ret void
				}

				define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
				; ALL-LABEL: usat_trunc_db_512_mem:
				; ALL: ## BB#0:
				; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
				; ALL-NEXT: retq
				%x3 = icmp ult <16 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x6 = trunc <16 x i32> %x5 to <16 x i8>
				store <16 x i8> %x6, <16 x i8>* %res, align 1
				ret void
				}

				define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
				; ALL-LABEL: usat_trunc_qb_512_mem:
				; ALL: ## BB#0:
				; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
				; ALL-NEXT: retq
				%x3 = icmp ult <8 x i64> %i, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
				%x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255>
				%x6 = trunc <8 x i64> %x5 to <8 x i8>
				store <8 x i8> %x6, <8 x i8>* %res, align 1
				ret void
				}

				define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
				; ALL-LABEL: usat_trunc_qd_512_mem:
				; ALL: ## BB#0:
				; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
				; ALL-NEXT: retq
				%x3 = icmp ult <8 x i64> %i, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
				%x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
				%x6 = trunc <8 x i64> %x5 to <8 x i32>
				store <8 x i32> %x6, <8 x i32>* %res, align 1
				ret void
				}

				define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
				; ALL-LABEL: usat_trunc_qw_512_mem:
				; ALL: ## BB#0:
				; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
				; ALL-NEXT: retq
				%x3 = icmp ult <8 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
				%x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
				%x6 = trunc <8 x i64> %x5 to <8 x i16>
				store <8 x i16> %x6, <8 x i16>* %res, align 1
				ret void
				}

				define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
				; KNL-LABEL: usat_trunc_db_1024:
				; KNL: ## BB#0:
				; KNL-NEXT: vpmovusdb %zmm0, %xmm0
				; KNL-NEXT: vpmovusdb %zmm1, %xmm1
				; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_db_1024:
				; SKX: ## BB#0:
				; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
				; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
				; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
				; SKX-NEXT: vpmovdw %zmm0, %ymm0
				; SKX-NEXT: vpmovdw %zmm1, %ymm1
				; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
				; SKX-NEXT: vpmovwb %zmm0, %ymm0
				; SKX-NEXT: retq
				%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x6 = trunc <32 x i32> %x5 to <32 x i8>
				ret <32 x i8> %x6
				}

				define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
				; KNL-LABEL: usat_trunc_db_1024_mem:
				; KNL: ## BB#0:
				; KNL-NEXT: vpmovusdb %zmm0, %xmm0
				; KNL-NEXT: vpmovusdb %zmm1, %xmm1
				; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
				; KNL-NEXT: vmovdqu %ymm0, (%rdi)
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_db_1024_mem:
				; SKX: ## BB#0:
				; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
				; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
				; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
				; SKX-NEXT: vpmovdw %zmm0, %ymm0
				; SKX-NEXT: vpmovdw %zmm1, %ymm1
				; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
				; SKX-NEXT: vpmovwb %zmm0, (%rdi)
				; SKX-NEXT: retq
				%x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
				%x6 = trunc <32 x i32> %x5 to <32 x i8>
				store <32 x i8>%x6, <32 x i8>* %p, align 1
				ret void
				}

				define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
				; ALL-LABEL: usat_trunc_dw_512:
				; ALL: ## BB#0:
				; ALL-NEXT: vpmovusdw %zmm0, %ymm0
				; ALL-NEXT: retq
				%x3 = icmp ult <16 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
				%x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
				%x6 = trunc <16 x i32> %x5 to <16 x i16>
				ret <16 x i16> %x6
				}

				define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
				; ALL-LABEL: usat_trunc_wb_128:
				; ALL: ## BB#0:
				; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
				; ALL-NEXT: retq
				%x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
				%x6 = trunc <8 x i16> %x5 to <8 x i8>
				ret <8 x i8>%x6
				}

				define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
				; KNL-LABEL: usat_trunc_qw_1024:
				; KNL: ## BB#0:
				; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
				; KNL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
				; KNL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
				; KNL-NEXT: vpmovqd %zmm0, %ymm0
				; KNL-NEXT: vpmovqd %zmm1, %ymm1
				; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
				; KNL-NEXT: vpmovdw %zmm0, %ymm0
				; KNL-NEXT: retq
				;
				; SKX-LABEL: usat_trunc_qw_1024:
				; SKX: ## BB#0:
				; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
				; SKX-NEXT: vpminuq %zmm2, %zmm1, %zmm1
				; SKX-NEXT: vpminuq %zmm2, %zmm0, %zmm0
				; SKX-NEXT: vpmovqd %zmm0, %ymm0
				; SKX-NEXT: vpmovqd %zmm1, %ymm1
				; SKX-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
				; SKX-NEXT: vpmovdw %zmm0, %ymm0
				; SKX-NEXT: retq
				%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
				%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
				%x6 = trunc <16 x i64> %x5 to <16 x i16>
				ret <16 x i16> %x6
				}

This is an archive of the discontinued LLVM Phabricator instance.

AVX-512: Truncate with unsigned saturation.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83963

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/avx-trunc.ll

llvm/trunk/test/CodeGen/X86/avx512-trunc.ll

This is an archive of the discontinued LLVM Phabricator instance.

AVX-512: Truncate with unsigned saturation.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 83963

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/test/CodeGen/X86/avx-trunc.ll

llvm/trunk/test/CodeGen/X86/avx512-trunc.ll

AVX-512: Truncate with unsigned saturation.
ClosedPublic