This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/trunk/
-
trunk/
-
lib/Target/X86/
-
Target/
-
X86/
-
X86ISelLowering.cpp
-
test/CodeGen/X86/
-
CodeGen/
-
X86/
-
combine-or.ll

Differential D3964

Separate the check for blend shuffle_vector masks
ClosedPublic

Authored by filcab on May 29 2014, 9:13 PM.

Download Raw Diff

Details

Reviewers

nadav
delena
andreadb

Commits

rGd3aebaf875a3: Separate the check for blend shuffle_vector masks
rL209923: Separate the check for blend shuffle_vector masks

Summary

Separate the check for blend shuffle_vector masks into isBlendMask.
This function will also be used to check if a vector shuffle is legal. No
change in functionality was intended, but we ended up improving codegen on
two tests, which were being (more) optimized only if the resulting shuffle
was legal.

Diff Detail

Repository: rL LLVM

Event Timeline

filcab updated this revision to Diff 9943.May 29 2014, 9:13 PM

filcab retitled this revision from to Separate the check for blend shuffle_vector masks.

filcab updated this object.

filcab edited the test plan for this revision. (Show Details)

filcab added reviewers: nadav, delena, andreadb.

filcab added a subscriber: Unknown Object (MLST).

Hi Filipe,

By adding the call 'isBlendMask' to 'X86TargetLowering::isShuffleMaskLegal' you actually made a functional change.
With you change, the DAGCombiner now is able to fold more vector OR dag nodes according to rules:

fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1).

I think that the behavior change in the DAGCombiner is good (as you said, we optimize more cases; that explains why you had to "fix" test 'combine-or.ll').

I can see other uses of method 'isShuffleMaskLegal' in LegalizeDAG.cpp and LegalizeVectorOps.cpp.
For example, in LegalizeDAG.cpp, method SelectionDAGLegalize::ExpandBUILD_VECTOR calls 'isShuffleMaskLegal' to see if it is possible to expand build_vector dag nodes into legal shuffles.

The other place where we call 'isShuffleMaskLegal' is LegalizeVectorOps.cpp.
More specifically, method ExpandBSWAP uses it to check if we can generate a legal byte wise shuffle to implement a BSWAP.

That said, I cannot see how your change might have negatively affected those methods.

For what is worth, your change looks good to me (it definitely makes sense for the DAGCombiner).

andreadb accepted this revision.May 30 2014, 10:11 AM

andreadb edited edge metadata.

This revision is now accepted and ready to land.May 30 2014, 10:11 AM

Closed by commit rL209923 (authored by @filcab).

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86ISelLowering.cpp

67 lines

test/

CodeGen/

X86/

combine-or.ll

4 lines

Diff 9965

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,423 Lines • ▼ Show 20 Lines	static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {

// AVX can use the vinsertf128 instruction to create 256-bit vectors		// AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.		// from two other 128-bit ones.

// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors		// 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
return LowerAVXCONCAT_VECTORS(Op, DAG);		return LowerAVXCONCAT_VECTORS(Op, DAG);
}		}

// Try to lower a shuffle node into a simple blend instruction.		static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
static SDValue		bool hasInt256, unsigned *MaskOut = nullptr) {
LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
MVT VT = SVOp->getSimpleValueType(0);
MVT EltVT = VT.getVectorElementType();		MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();

// There is no blend with immediate in AVX-512.		// There is no blend with immediate in AVX-512.
if (VT.is512BitVector())		if (VT.is512BitVector())
return SDValue();		return false;

if (!Subtarget->hasSSE41() \|\| EltVT == MVT::i8)		if (!hasSSE41 \|\| EltVT == MVT::i8)
return SDValue();		return false;
if (!Subtarget->hasInt256() && VT == MVT::v16i16)		if (!hasInt256 && VT == MVT::v16i16)
return SDValue();		return false;

// Check the mask for BLEND and build the value.
unsigned MaskValue = 0;		unsigned MaskValue = 0;
		unsigned NumElems = VT.getVectorNumElements();
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.		// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
unsigned NumLanes = (NumElems-1)/8 + 1;		unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;		unsigned NumElemsInLane = NumElems / NumLanes;

// Blend for v16i16 should be symetric for the both lanes.		// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {		for (unsigned i = 0; i < NumElemsInLane; ++i) {

int SndLaneEltIdx = (NumLanes == 2) ?		int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
SVOp->getMaskElt(i + NumElemsInLane) : -1;		int EltIdx = MaskVals[i];
int EltIdx = SVOp->getMaskElt(i);

if ((EltIdx < 0 \|\| EltIdx == (int)i) &&		if ((EltIdx < 0 \|\| EltIdx == (int)i) &&
(SndLaneEltIdx < 0 \|\| SndLaneEltIdx == (int)(i + NumElemsInLane)))		(SndLaneEltIdx < 0 \|\| SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;		continue;

if (((unsigned)EltIdx == (i + NumElems)) &&		if (((unsigned)EltIdx == (i + NumElems)) &&
(SndLaneEltIdx < 0 \|\|		(SndLaneEltIdx < 0 \|\|
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))		(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue \|= (1<<i);		MaskValue \|= (1 << i);
else		else
return SDValue();		return false;
}		}

		if (MaskOut)
		*MaskOut = MaskValue;
		return true;
		}

		// Try to lower a shuffle node into a simple blend instruction.
		// This function assumes isBlendMask returns true for this
		// SuffleVectorSDNode
		static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
		unsigned MaskValue,
		const X86Subtarget *Subtarget,
		SelectionDAG &DAG) {
		MVT VT = SVOp->getSimpleValueType(0);
		MVT EltVT = VT.getVectorElementType();
		assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
		Subtarget->hasInt256() && "Trying to lower a "
		"VECTOR_SHUFFLE to a Blend but "
		"with the wrong mask"));
		SDValue V1 = SVOp->getOperand(0);
		SDValue V2 = SVOp->getOperand(1);
		SDLoc dl(SVOp);
		unsigned NumElems = VT.getVectorNumElements();

// Convert i32 vectors to floating point if it is not AVX2.		// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.		// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
MVT BlendVT = VT;		MVT BlendVT = VT;
if (EltVT == MVT::i64 \|\| (EltVT == MVT::i32 && !Subtarget->hasInt256())) {		if (EltVT == MVT::i64 \|\| (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),		BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
NumElems);		NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);		V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);		V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
▲ Show 20 Lines • Show All 1,420 Lines • ▼ Show 20 Lines	if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx))
return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),		return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),
Idx*(NumElems/2), DAG, dl);		Idx*(NumElems/2), DAG, dl);

// Handle VPERM2F128/VPERM2I128 permutations		// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasFp256))		if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,		return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);		V2, getShuffleVPERM2X128Immediate(SVOp), DAG);

SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);		unsigned MaskValue;
if (BlendOp.getNode())		if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
return BlendOp;		&MaskValue))
		return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);

if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))		if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
return getINSERTPS(SVOp, dl, DAG);		return getINSERTPS(SVOp, dl, DAG);

unsigned Imm8;		unsigned Imm8;
if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))		if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);		return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);

▲ Show 20 Lines • Show All 7,244 Lines • ▼ Show 20 Lines	return (SVT.getVectorNumElements() == 2 \|\|
isSHUFPMask(M, SVT) \|\|		isSHUFPMask(M, SVT) \|\|
isPSHUFDMask(M, SVT) \|\|		isPSHUFDMask(M, SVT) \|\|
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) \|\|		isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) \|\|
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) \|\|		isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) \|\|
isPALIGNRMask(M, SVT, Subtarget) \|\|		isPALIGNRMask(M, SVT, Subtarget) \|\|
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) \|\|		isUNPCKLMask(M, SVT, Subtarget->hasInt256()) \|\|
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) \|\|		isUNPCKHMask(M, SVT, Subtarget->hasInt256()) \|\|
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) \|\|		isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) \|\|
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));		isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) \|\|
		isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
}		}

bool		bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,		X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {		EVT VT) const {
if (!VT.isSimple())		if (!VT.isSimple())
return false;		return false;

▲ Show 20 Lines • Show All 6,243 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/combine-or.ll

	Show All 19 Lines
	define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {			define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
	%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>			%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
	%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>			%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
	%or = or <4 x i32> %shuf1, %shuf2			%or = or <4 x i32> %shuf1, %shuf2
	ret <4 x i32> %or			ret <4 x i32> %or
	}			}
	; CHECK-LABEL: test2			; CHECK-LABEL: test2
	; CHECK-NOT: xorps			; CHECK-NOT: xorps
	; CHECK: shufps			; CHECK: movsd
	; CHECK: ret			; CHECK: ret


	define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {			define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
	%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>			%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
	%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>			%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
	%or = or <2 x i64> %shuf1, %shuf2			%or = or <2 x i64> %shuf1, %shuf2
	ret <2 x i64> %or			ret <2 x i64> %or
	▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines
	define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {			define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
	%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>			%and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
	%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>			%and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
	%or = or <4 x i32> %and1, %and2			%or = or <4 x i32> %and1, %and2
	ret <4 x i32> %or			ret <4 x i32> %or
	}			}
	; CHECK-LABEL: test9			; CHECK-LABEL: test9
	; CHECK-NOT: xorps			; CHECK-NOT: xorps
	; CHECK: shufps			; CHECK: movsd
	; CHECK: ret			; CHECK: ret


	define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {			define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
	%and1 = and <2 x i64> %a, <i64 0, i64 -1>			%and1 = and <2 x i64> %a, <i64 0, i64 -1>
	%and2 = and <2 x i64> %b, <i64 -1, i64 0>			%and2 = and <2 x i64> %b, <i64 -1, i64 0>
	%or = or <2 x i64> %and1, %and2			%or = or <2 x i64> %and1, %and2
	ret <2 x i64> %or			ret <2 x i64> %or
	▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines