This is an archive of the discontinued LLVM Phabricator instance.

lib/Target/X86/X86ISelLowering.cpp
31745	I think vector types always use combineANDXORWithAllOnesIntoANDNP() for this transform because we peek through the bitcasts to find the integer logic ops for vectors. For scalars, we transform to the X86-specific FP-logic nodes, so that's why we need a separate way to handle them. I'm not sure if that's necessary, but we had load folding bugs when we tried to handle vectors and scalars together. So this example already works without this patch: define <2 x double> @FsANDNPSrr(<2 x double> %x, <2 x double> %y) { %bc1 = bitcast <2 x double> %x to <2 x i64> %bc2 = bitcast <2 x double> %y to <2 x i64> %not = xor <2 x i64> %bc2, <i64 -1, i64 -1> %and = and <2 x i64> %bc1, %not %bc3 = bitcast <2 x i64> %and to <2 x double> ret <2 x double> %bc3 } $ ./llc -o - andn.ll andnps %xmm0, %xmm1 movaps %xmm1, %xmm0 retq

delena accepted this revision.Dec 4 2016, 3:41 AM

delena edited edge metadata.

This revision is now accepted and ready to land.Dec 4 2016, 3:41 AM

Closed by commit rL288675: [x86] fold fand (fxor X, -1) Y --> fandn X, Y (authored by spatel). · Explain WhyDec 5 2016, 7:55 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Target/

X86/

X86ISelLowering.cpp

30 lines

test/

CodeGen/

X86/

fp-logic-replace.ll

9 lines

Diff 80180

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 31,729 Lines • ▼ Show 20 Lines	if (!isNullFPScalarOrVectorConst(V))
return SDValue();		return SDValue();

if (V.getValueType().isVector())		if (V.getValueType().isVector())
return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));		return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));

return V;		return V;
}		}

		static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		SDValue N0 = N->getOperand(0);
		SDValue N1 = N->getOperand(1);
		EVT VT = N->getValueType(0);
		SDLoc DL(N);

		if (!((VT == MVT::f32 && Subtarget.hasSSE1()) \|\|
		delenaUnsubmitted Not Done Reply Inline Actions It should work for scalar and vector types, right? You check only scalar VT (f32, f64) here. delena: It should work for scalar and vector types, right? You check only scalar VT (f32, f64) here.
		spatelAuthorUnsubmitted Not Done Reply Inline Actions I think vector types always use combineANDXORWithAllOnesIntoANDNP() for this transform because we peek through the bitcasts to find the integer logic ops for vectors. For scalars, we transform to the X86-specific FP-logic nodes, so that's why we need a separate way to handle them. I'm not sure if that's necessary, but we had load folding bugs when we tried to handle vectors and scalars together. So this example already works without this patch: define <2 x double> @FsANDNPSrr(<2 x double> %x, <2 x double> %y) { %bc1 = bitcast <2 x double> %x to <2 x i64> %bc2 = bitcast <2 x double> %y to <2 x i64> %not = xor <2 x i64> %bc2, <i64 -1, i64 -1> %and = and <2 x i64> %bc1, %not %bc3 = bitcast <2 x i64> %and to <2 x double> ret <2 x double> %bc3 } $ ./llc -o - andn.ll andnps %xmm0, %xmm1 movaps %xmm1, %xmm0 retq spatel: I think vector types always use combineANDXORWithAllOnesIntoANDNP() for this transform because…
		(VT == MVT::f64 && Subtarget.hasSSE2())))
		return SDValue();

		auto isAllOnesConstantFP = [](SDValue V) {
		auto *C = dyn_cast<ConstantFPSDNode>(V);
		return C && C->getConstantFPValue()->isAllOnesValue();
		};

		// fand (fxor X, -1), Y --> fandn X, Y
		if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1)))
		return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);

		// fand X, (fxor Y, -1) --> fandn Y, X
		if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1)))
		return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);

		return SDValue();
		}

/// Do target-specific dag combines on X86ISD::FAND nodes.		/// Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,		static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
// FAND(0.0, x) -> 0.0		// FAND(0.0, x) -> 0.0
if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))		if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
return V;		return V;

// FAND(x, 0.0) -> 0.0		// FAND(x, 0.0) -> 0.0
if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))		if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
return V;		return V;

		if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
		return V;

return lowerX86FPLogicOp(N, DAG, Subtarget);		return lowerX86FPLogicOp(N, DAG, Subtarget);
}		}

/// Do target-specific dag combines on X86ISD::FANDN nodes.		/// Do target-specific dag combines on X86ISD::FANDN nodes.
static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,		static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
// FANDN(0.0, x) -> x		// FANDN(0.0, x) -> x
if (isNullFPScalarOrVectorConst(N->getOperand(0)))		if (isNullFPScalarOrVectorConst(N->getOperand(0)))
▲ Show 20 Lines • Show All 2,164 Lines • Show Last 20 Lines

test/CodeGen/X86/fp-logic-replace.ll

Show All 23 Lines	;
%and = and i64 %bc1, %bc2		%and = and i64 %bc1, %bc2
%bc3 = bitcast i64 %and to double		%bc3 = bitcast i64 %and to double
ret double %bc3		ret double %bc3
}		}

define double @FsANDNPSrr(double %x, double %y) {		define double @FsANDNPSrr(double %x, double %y) {
; SSE-LABEL: FsANDNPSrr:		; SSE-LABEL: FsANDNPSrr:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero		; SSE-NEXT: andnps %xmm0, %xmm1
; SSE-NEXT: xorpd %xmm1, %xmm2		; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: andpd %xmm2, %xmm0
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: FsANDNPSrr:		; AVX-LABEL: FsANDNPSrr:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero		; AVX-NEXT: vandnps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vxorpd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
;		;
%bc1 = bitcast double %x to i64		%bc1 = bitcast double %x to i64
%bc2 = bitcast double %y to i64		%bc2 = bitcast double %y to i64
%not = xor i64 %bc2, -1		%not = xor i64 %bc2, -1
%and = and i64 %bc1, %not		%and = and i64 %bc1, %not
%bc3 = bitcast i64 %and to double		%bc3 = bitcast i64 %and to double
ret double %bc3		ret double %bc3
Show All 38 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[x86] fold fand (fxor X, -1) Y --> fandn X, YClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 80180

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/fp-logic-replace.ll

[x86] fold fand (fxor X, -1) Y --> fandn X, Y
ClosedPublic