Diff 381247

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,621 Lines • ▼ Show 20 Lines	static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
SDValue Mask = DAG.getConstant(		SDValue Mask = DAG.getConstant(
APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);		APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);		SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
SDValue Zero = DAG.getConstant(0, DL, VT);		SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);		SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
return DAG.getZExtOrTrunc(Setcc, DL, VT);		return DAG.getZExtOrTrunc(Setcc, DL, VT);
}		}

		/// For targets that support usubsat, match a bit-hack form of that operation
		/// that ends in 'and' and convert it.
		static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
		SDValue N0 = N->getOperand(0);
		SDValue N1 = N->getOperand(1);
		EVT VT = N1.getValueType();

		// Canonicalize xor as operand 0.
		if (N1.getOpcode() == ISD::XOR)
		std::swap(N0, N1);

		if (N0.getOpcode() != ISD::XOR \|\| N1.getOpcode() != ISD::SRA \|\|
		!N0.hasOneUse() \|\| !N1.hasOneUse() \|\|
		N0.getOperand(0) != N1.getOperand(0))
		return SDValue();

		unsigned BitWidth = VT.getScalarSizeInBits();
		ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
		ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
		if (!XorC \|\| !XorC->getAPIntValue().isSignMask() \|\|
		!SraC \|\| SraC->getAPIntValue() != BitWidth - 1)
		return SDValue();

		// (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
		foadUnsubmitted Not Done Reply Inline Actions What about the same thing with X +/- 128 instead of X ^ 128? Do they already get canonicalized to the XOR version? foad: What about the same thing with X +/- 128 instead of X ^ 128? Do they already get canonicalized…
		RKSimonUnsubmitted Not Done Reply Inline Actions I think instcombine will have canonicalized them: https://alive2.llvm.org/ce/z/n2TfRo RKSimon: I think instcombine will have canonicalized them: https://alive2.llvm.org/ce/z/n2TfRo
		spatelAuthorUnsubmitted Done Reply Inline Actions Yes, instcombine will convert the math op to logic op. But this is an interesting question for codegen because one form may be better than the other depending on target. For example on x86 we get these variants: leal -2147483648(%rdi), %eax vs. movl %edi, %eax xorl $-2147483648, %eax ## imm = 0x80000000 ...while on aarch64: mov w8, #-2147483648 add w0, w0, w8 vs. eor w0, w0, #0x80000000 Also, this patch is part of a chain that eventually leads back to a question about whether instcombine is canonicalizing or even can (in case of extra uses) canonicalize to a form that is consistent (we do form usubsat in IR sometimes, but we're missing patterns as shown in this patch). So I think it's not a stretch to make the match more flexible in this fold...gives us some protection in case code outside of here decides to do things differently in the future. spatel: Yes, instcombine will convert the math op to logic op. But this is an interesting question for…
		SDLoc DL(N);
		SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
		return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
		}

SDValue DAGCombiner::visitAND(SDNode *N) {		SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);		SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);		SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();		EVT VT = N1.getValueType();

// x & x --> x		// x & x --> x
if (N0 == N1)		if (N0 == N1)
return N0;		return N0;
▲ Show 20 Lines • Show All 346 Lines • ▼ Show 20 Lines	auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {

return true;		return true;
};		};

// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).		// Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
if (IsAndZeroExtMask(N0, N1))		if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));		return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));

		if (hasOperation(ISD::USUBSAT, VT))
		RKSimonUnsubmitted Done Reply Inline Actions If we could use hasOperation then AVX1 should be able to match this for 256-bit integers as well. RKSimon: If we could use hasOperation then AVX1 should be able to match this for 256-bit integers as…
		spatelAuthorUnsubmitted Done Reply Inline Actions Yes - that's how I had drafted this initially, but I noticed potential regressions with extra uses. But extra-use patterns on something like this might be too rare to worry about. I'll add some more tests and update. spatel: Yes - that's how I had drafted this initially, but I noticed potential regressions with extra…
		if (SDValue V = foldAndToUsubsat(N, DAG))
		return V;

return SDValue();		return SDValue();
}		}

/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16.		/// Match (a >> 8) \| (a << 8) as (bswap a) >> 16.
SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,		SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits) {		bool DemandHighBits) {
if (!LegalOperations)		if (!LegalOperations)
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 17,673 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/usubsat.ll

	Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines
	; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1			; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
	; GFX6-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0			; GFX6-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
	; GFX6-NEXT: v_and_b32_e32 v0, v1, v0			; GFX6-NEXT: v_and_b32_e32 v0, v1, v0
	; GFX6-NEXT: s_setpc_b64 s[30:31]			; GFX6-NEXT: s_setpc_b64 s[30:31]
	;			;
	; GFX8-LABEL: usubsat_as_bithack_i16:			; GFX8-LABEL: usubsat_as_bithack_i16:
	; GFX8: ; %bb.0:			; GFX8: ; %bb.0:
	; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)			; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0			; GFX8-NEXT: s_movk_i32 s4, 0x8000
	; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0			; GFX8-NEXT: v_sub_u16_e64 v0, v0, s4 clamp
	; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
	; GFX8-NEXT: s_setpc_b64 s[30:31]			; GFX8-NEXT: s_setpc_b64 s[30:31]
	;			;
	; GFX9-LABEL: usubsat_as_bithack_i16:			; GFX9-LABEL: usubsat_as_bithack_i16:
	; GFX9: ; %bb.0:			; GFX9: ; %bb.0:
	; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)			; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GFX9-NEXT: v_ashrrev_i16_e32 v1, 15, v0			; GFX9-NEXT: s_movk_i32 s4, 0x8000
	; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0			; GFX9-NEXT: v_sub_u16_e64 v0, v0, s4 clamp
	; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
	; GFX9-NEXT: s_setpc_b64 s[30:31]			; GFX9-NEXT: s_setpc_b64 s[30:31]
	;			;
	; GFX10-LABEL: usubsat_as_bithack_i16:			; GFX10-LABEL: usubsat_as_bithack_i16:
	; GFX10: ; %bb.0:			; GFX10: ; %bb.0:
	; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)			; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
	; GFX10-NEXT: s_waitcnt_vscnt null, 0x0			; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
	; GFX10-NEXT: v_ashrrev_i16 v1, 15, v0			; GFX10-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp
	; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0
	; GFX10-NEXT: v_and_b32_e32 v0, v1, v0
	; GFX10-NEXT: s_setpc_b64 s[30:31]			; GFX10-NEXT: s_setpc_b64 s[30:31]
	%signsplat = ashr i16 %x, 15			%signsplat = ashr i16 %x, 15
	%flipsign = xor i16 %x, 32768			%flipsign = xor i16 %x, 32768
	%result = and i16 %signsplat, %flipsign			%result = and i16 %signsplat, %flipsign
	ret i16 %result			ret i16 %result
	}			}

	define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) {			define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) {
	▲ Show 20 Lines • Show All 527 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/psubus.ll

Show All 28 Lines
}		}

; This is logically equivalent to the above.		; This is logically equivalent to the above.
; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))		; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))

define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {		define <8 x i16> @ashr_xor_and(<8 x i16> %x) nounwind {
; SSE-LABEL: ashr_xor_and:		; SSE-LABEL: ashr_xor_and:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1		; SSE-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: psraw $15, %xmm1
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX1-LABEL: ashr_xor_and:		; AVX-LABEL: ashr_xor_and:
; AVX1: # %bb.0:		; AVX: # %bb.0:
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1		; AVX-NEXT: vpsubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0		; AVX-NEXT: retq
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ashr_xor_and:
; AVX2: # %bb.0:
; AVX2-NEXT: vpsraw $15, %xmm0, %xmm1
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: ashr_xor_and:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsraw $15, %xmm0, %xmm1
; AVX512-NEXT: vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: retq
%signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>		%signsplat = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%flipsign = xor <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>		%flipsign = xor <8 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
%res = and <8 x i16> %signsplat, %flipsign		%res = and <8 x i16> %signsplat, %flipsign
ret <8 x i16> %res		ret <8 x i16> %res
}		}

		; negative test - extra uses may lead to extra instructions when custom-lowered

define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {		define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x i8>* %p2) nounwind {
; SSE-LABEL: ashr_xor_and_commute_uses:		; SSE-LABEL: ashr_xor_and_commute_uses:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm1, %xmm1		; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpgtb %xmm0, %xmm1		; SSE-NEXT: pcmpgtb %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, (%rdi)		; SSE-NEXT: movdqa %xmm1, (%rdi)
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0		; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rsi)		; SSE-NEXT: movdqa %xmm0, (%rsi)
Show All 13 Lines	; AVX-NEXT: retq
store <16 x i8> %signsplat, <16 x i8>* %p1		store <16 x i8> %signsplat, <16 x i8>* %p1
%flipsign = xor <16 x i8> %x, <i8 undef, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>		%flipsign = xor <16 x i8> %x, <i8 undef, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
store <16 x i8> %flipsign, <16 x i8>* %p2		store <16 x i8> %flipsign, <16 x i8>* %p2
%res = and <16 x i8> %flipsign, %signsplat		%res = and <16 x i8> %flipsign, %signsplat
ret <16 x i8> %res		ret <16 x i8> %res
}		}

define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {		define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
; SSE-LABEL: ashr_xor_and_custom:		; SSE2OR3-LABEL: ashr_xor_and_custom:
; SSE: # %bb.0:		; SSE2OR3: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1		; SSE2OR3-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1		; SSE2OR3-NEXT: psrad $31, %xmm1
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0		; SSE2OR3-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm1, %xmm0		; SSE2OR3-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: retq		; SSE2OR3-NEXT: retq
		;
		; SSE41-LABEL: ashr_xor_and_custom:
		; SSE41: # %bb.0:
		; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
		; SSE41-NEXT: pmaxud %xmm1, %xmm0
		; SSE41-NEXT: psubd %xmm1, %xmm0
		; SSE41-NEXT: retq
;		;
; AVX1-LABEL: ashr_xor_and_custom:		; AVX1-LABEL: ashr_xor_and_custom:
; AVX1: # %bb.0:		; AVX1: # %bb.0:
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1		; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0		; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0		; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: ashr_xor_and_custom:		; AVX2-LABEL: ashr_xor_and_custom:
; AVX2: # %bb.0:		; AVX2: # %bb.0:
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1		; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]		; AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0		; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
; AVX512-LABEL: ashr_xor_and_custom:		; AVX512-LABEL: ashr_xor_and_custom:
; AVX512: # %bb.0:		; AVX512: # %bb.0:
; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1		; AVX512-NEXT: vpsrad $31, %xmm0, %xmm1
; AVX512-NEXT: vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0		; AVX512-NEXT: vpternlogd $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
; AVX512-NEXT: retq		; AVX512-NEXT: retq
%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>		%signsplat = ashr <4 x i32> %x, <i32 undef, i32 31, i32 31, i32 31>
		RKSimonUnsubmitted Not Done Reply Inline Actions We have a couple of 'UseVPTERNLOG' style checks in code already - you can add that to the D112095 special case to fix this. RKSimon: We have a couple of 'UseVPTERNLOG' style checks in code already - you can add that to the…
%flipsign = xor <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>		%flipsign = xor <4 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%res = and <4 x i32> %flipsign, %signsplat		%res = and <4 x i32> %flipsign, %signsplat
ret <4 x i32> %res		ret <4 x i32> %res
}		}

; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))		; usubsat X, (1 << (BW-1)) <--> (X ^ (1 << (BW-1))) & (ashr X, (BW-1))

define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {		define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
▲ Show 20 Lines • Show All 210 Lines • ▼ Show 20 Lines	vector.ph:
%1 = xor <16 x i16> %x, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>		%1 = xor <16 x i16> %x, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
%res = select <16 x i1> %0, <16 x i16> %1, <16 x i16> zeroinitializer		%res = select <16 x i1> %0, <16 x i16> %1, <16 x i16> zeroinitializer
ret <16 x i16> %res		ret <16 x i16> %res
}		}

define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {		define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
; SSE-LABEL: ashr_xor_and_v16i16:		; SSE-LABEL: ashr_xor_and_v16i16:
; SSE: # %bb.0:		; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm1, %xmm2		; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; SSE-NEXT: psraw $15, %xmm2		; SSE-NEXT: psubusw %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm3		; SSE-NEXT: psubusw %xmm2, %xmm1
; SSE-NEXT: psraw $15, %xmm3
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pand %xmm3, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX1-LABEL: ashr_xor_and_v16i16:		; AVX1-LABEL: ashr_xor_and_v16i16:
; AVX1: # %bb.0:		; AVX1: # %bb.0:
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1		; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2		; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2		; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1		; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0		; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq		; AVX1-NEXT: retq
;		;
; AVX2-LABEL: ashr_xor_and_v16i16:		; AVX2-LABEL: ashr_xor_and_v16i16:
; AVX2: # %bb.0:		; AVX2: # %bb.0:
; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1		; AVX2-NEXT: vpsubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq		; AVX2-NEXT: retq
;		;
; AVX512-LABEL: ashr_xor_and_v16i16:		; AVX512-LABEL: ashr_xor_and_v16i16:
; AVX512: # %bb.0:		; AVX512: # %bb.0:
; AVX512-NEXT: vpsraw $15, %ymm0, %ymm1		; AVX512-NEXT: vpsubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
; AVX512-NEXT: retq		; AVX512-NEXT: retq
%signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>		%signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>		%flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
%res = and <16 x i16> %signsplat, %flipsign		%res = and <16 x i16> %signsplat, %flipsign
ret <16 x i16> %res		ret <16 x i16> %res
}		}

define <16 x i16> @test8(<16 x i16> %x) nounwind {		define <16 x i16> @test8(<16 x i16> %x) nounwind {
▲ Show 20 Lines • Show All 2,683 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] fold bit-hack form of usubsat
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 381247

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/usubsat.ll

llvm/test/CodeGen/X86/psubus.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] fold bit-hack form of usubsatClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 381247

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/test/CodeGen/AMDGPU/usubsat.ll

llvm/test/CodeGen/X86/psubus.ll

[DAGCombiner] fold bit-hack form of usubsat
ClosedPublic