Diff 79761

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//		//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file defines the interfaces that X86 uses to lower LLVM code into a		// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.		// selection DAG.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "X86ISelLowering.h"		#include "X86ISelLowering.h"
		igorbUnsubmitted Not Done Reply Inline Actions please add comments. make this transformation only for AVX512 ( you may benefit only if instruction with zero mask exist) igorb: please add comments. make this transformation only for AVX512 ( you may benefit only if…
#include "Utils/X86ShuffleDecode.h"		#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"		#include "X86CallingConv.h"
#include "X86FrameLowering.h"		#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"		#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"		#include "X86MachineFunctionInfo.h"
#include "X86ShuffleDecodeConstantPool.h"		#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"		#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"		#include "X86TargetObjectFile.h"
▲ Show 20 Lines • Show All 27,526 Lines • ▼ Show 20 Lines	static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}		}

// The replacement was made in place; don't return anything.		// The replacement was made in place; don't return anything.
return SDValue();		return SDValue();
}		}

/// If a vector select has an operand that is -1 or 0, simplify the select to a		/// If a vector select has an operand that is -1 or 0, simplify the select to a
/// bitwise logic operation.		/// bitwise logic operation.
static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG) {		static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);		SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);		SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);		SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();		EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();		EVT CondVT = Cond.getValueType();
SDLoc DL(N);		SDLoc DL(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();

if (N->getOpcode() != ISD::VSELECT)		if (N->getOpcode() != ISD::VSELECT)
return SDValue();		return SDValue();

		bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
		//Check if the first operand is all zeros.This situation only
		igorbUnsubmitted Not Done Reply Inline Actions please add space after // the same bellow. igorb: please add space after // the same bellow.
		//applies to avx512.
		if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse()) {
		delenaUnsubmitted Not Done Reply Inline Actions I'd put Cond.hasOneUse() here, not in the common code. I assume that we'll need all patterns back in this case. delena: I'd put Cond.hasOneUse() here, not in the common code. I assume that we'll need all patterns…
		assert((N->getOpcode() == ISD::VSELECT) && "expects a vector selector!");
		delenaUnsubmitted Not Done Reply Inline Actions You check N->getOpcode() == ISD::VSELECT 2 lines above. delena: You check N->getOpcode() == ISD::VSELECT 2 lines above.
		m_zuckermanAuthorUnsubmitted Not Done Reply Inline Actions Yes I know, but if someone will delete this line I have the assert to check it also. m_zuckerman: Yes I know, but if someone will delete this line I have the assert to check it also.
		delenaUnsubmitted Not Done Reply Inline Actions I don't think that we need duplication here. delena: I don't think that we need duplication here.
		//Invert the cond to not(cond) : xor(op,allones)=not(op)
		SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
		DAG.getConstant(1, DL, Cond.getValueType()));
		//Vselect cond, op1, op2 = Vselect not(cond), op2, op1
		return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
		}
assert(CondVT.isVector() && "Vector select expects a vector selector!");		assert(CondVT.isVector() && "Vector select expects a vector selector!");

// To use the condition operand as a bitwise mask, it must have elements that		// To use the condition operand as a bitwise mask, it must have elements that
// are the same size as the select elements. Ie, the condition operand must		// are the same size as the select elements. Ie, the condition operand must
// have already been promoted from the IR select condition type <N x i1>.		// have already been promoted from the IR select condition type <N x i1>.
// Don't check if the types themselves are equal because that excludes		// Don't check if the types themselves are equal because that excludes
// vector floating-point selects.		// vector floating-point selects.
if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())		if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();		return SDValue();

bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());		bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());		FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());

// Try to invert the condition if true value is not all 1s and false value is		// Try to invert the condition if true value is not all 1s and false value is
// not all 0s.		// not all 0s.
if (!TValIsAllOnes && !FValIsAllZeros &&		if (!TValIsAllOnes && !FValIsAllZeros &&
// Check if the selector will be produced by CMPP/PCMP.		// Check if the selector will be produced by CMPP/PCMP.
Cond.getOpcode() == ISD::SETCC &&		Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted.		// Check if SETCC has already been promoted.
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==		TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
▲ Show 20 Lines • Show All 449 Lines • ▼ Show 20 Lines	if (Other.getNode() && Other->getNumOperands() == 2 &&
// don't rely on particular values of undef lanes.		// don't rely on particular values of undef lanes.
return DAG.getNode(		return DAG.getNode(
X86ISD::SUBUS, DL, VT, OpLHS,		X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));		DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));
}		}
}		}
}		}

if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG))		if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
return V;		return V;

// If this is a dynamic select (non-constant condition) and we can match		// If this is a dynamic select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the		// this node with one of the variable blend instructions, restructure the
// condition so that the blends can use the high bit of each element and use		// condition so that the blends can use the high bit of each element and use
// SimplifyDemandedBits to simplify the condition operand.		// SimplifyDemandedBits to simplify the condition operand.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&		if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&		!DCI.isBeforeLegalize() &&
▲ Show 20 Lines • Show All 5,395 Lines • Show Last 20 Lines

test/CodeGen/X86/avx512-vec-cmp.ll

Show First 20 Lines • Show All 652 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retq
%cmpvector_i = fcmp oeq <16 x float> %a, %b		%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>		%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
ret <16 x i32> %conv		ret <16 x i32> %conv
}		}

define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {		define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
; CHECK-LABEL: test14:		; CHECK-LABEL: test14:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1		; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k0		; CHECK-NEXT: vpcmpgtd %zmm0, %zmm2, %k1
; CHECK-NEXT: knotw %k0, %k1		; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%sub_r = sub <16 x i32> %a, %b		%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a		%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>		%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
%mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer		%mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
%res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r		%res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
ret <16 x i32>%res		ret <16 x i32>%res
}		}

define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {		define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
; CHECK-LABEL: test15:		; CHECK-LABEL: test15:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1		; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2
; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k0		; CHECK-NEXT: vpcmpgtq %zmm0, %zmm2, %k1
; CHECK-NEXT: knotw %k0, %k1		; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%sub_r = sub <8 x i64> %a, %b		%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a		%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>		%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
%mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer		%mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
%res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r		%res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
ret <8 x i64>%res		ret <8 x i64>%res
}		}
▲ Show 20 Lines • Show All 561 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Fix bug 30945- [AVX512] Failure to flip vector comparison to remove not mask instruction
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 79761

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/avx512-vec-cmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

Fix bug 30945- [AVX512] Failure to flip vector comparison to remove not mask instructionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 79761

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/avx512-vec-cmp.ll

Fix bug 30945- [AVX512] Failure to flip vector comparison to remove not mask instruction
ClosedPublic