Diff 140746

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 30,474 Lines • ▼ Show 20 Lines	static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
// the ADDSUB idiom has been successfully recognized. There are no known		// the ADDSUB idiom has been successfully recognized. There are no known
// X86 targets with 512-bit ADDSUB instructions!		// X86 targets with 512-bit ADDSUB instructions!
if (VT.is512BitVector())		if (VT.is512BitVector())
return SDValue();		return SDValue();

return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);		return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}		}

		// For SSE4.1 and AVX512, we may want to combine VRNDSCALES from
		// vector_shuffle<{0,3}\|{0,5,6,7}> (fceil\|ffloor A), B
		// patterns.
		static SDValue combineShuffleFloorCeil(SDNode *N, SelectionDAG &DAG,
		const X86Subtarget &Subtarget) {
		if (!isa<ShuffleVectorSDNode>(N))
		return SDValue();
		EVT VT = N->getValueType(0);
		unsigned Num = VT.getVectorNumElements();
		if (Num * VT.getScalarSizeInBits() != 128 \|\| !Subtarget.hasSSE41())
		return SDValue();
		SDValue N0 = N->getOperand(0);
		SDValue N1 = N->getOperand(1);
		int Op = N0.getOpcode();
		if ((Num != 2 && Num != 4) \|\| (Op != ISD::FCEIL && Op != ISD::FFLOOR))
		return SDValue();

		// The mask being matched here is equivalent to a 0...01 select mask.
		ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
		if (SVOp->getMaskElt(0) != 0)
		return SDValue();
		for (unsigned i = 1; i < Num; ++i)
		if (SVOp->getMaskElt(i) != Num + i)
		craig.topperUnsubmitted Not Done Reply Inline Actions There's a signed vs unsigned comparison warning on this line. craig.topper: There's a signed vs unsigned comparison warning on this line.
		return SDValue();

		int Imm = (Op == ISD::FCEIL) ? 2 : 1;
		SDLoc DL(N);
		return DAG.getNode(X86ISD::VRNDSCALES, DL, VT, N1, N0.getOperand(0),
		DAG.getConstant(Imm, DL, MVT::i32));
		craig.topperUnsubmitted Not Done Reply Inline Actions Can we just do this with isel patterns like we do for ADDSS? craig.topper: Can we just do this with isel patterns like we do for ADDSS?
		mike.dvoretskyAuthorUnsubmitted Not Done Reply Inline Actions I've considered that, but decided to fold it here. To do it in .td patterns we'd need to add 4 new patterns in 2 separate files. 32 and 64 bit patterns would need to be added for VROUNDS* on AVX and ROUNDS* on SSE4.1. Writing this pattern here both makes it easier to track and produces less check complexity. mike.dvoretsky: I've considered that, but decided to fold it here. To do it in .td patterns we'd need to add 4…
		}

// We are looking for a shuffle where both sources are concatenated with undef		// We are looking for a shuffle where both sources are concatenated with undef
// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so		// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
// if we can express this as a single-source shuffle, that's preferable.		// if we can express this as a single-source shuffle, that's preferable.
static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,		static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {		const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX2() \|\| !isa<ShuffleVectorSDNode>(N))		if (!Subtarget.hasAVX2() \|\| !isa<ShuffleVectorSDNode>(N))
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 157 Lines • ▼ Show 20 Lines	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
break;		break;
}		}

if (Elts.size() == VT.getVectorNumElements())		if (Elts.size() == VT.getVectorNumElements())
if (SDValue LD =		if (SDValue LD =
EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))		EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
return LD;		return LD;

		if (SDValue RndScale = combineShuffleFloorCeil(N, DAG, Subtarget))
		return RndScale;

// For AVX2, we sometimes want to combine		// For AVX2, we sometimes want to combine
// (vector_shuffle <mask> (concat_vectors t1, undef)		// (vector_shuffle <mask> (concat_vectors t1, undef)
// (concat_vectors t2, undef))		// (concat_vectors t2, undef))
// Into:		// Into:
// (vector_shuffle <mask> (concat_vectors t1, t2), undef)		// (vector_shuffle <mask> (concat_vectors t1, t2), undef)
// Since the latter can be efficiently lowered with VPERMD/VPERMQ		// Since the latter can be efficiently lowered with VPERMD/VPERMQ
if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))		if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
return ShufConcat;		return ShufConcat;
▲ Show 20 Lines • Show All 9,065 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86InstrAVX512.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,212 Lines • ▼ Show 20 Lines

defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,		defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,		f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<64, CD8VT1>;		EVEX_CD8<64, CD8VT1>;

//-------------------------------------------------		//-------------------------------------------------
// Integer truncate and extend operations		// Integer truncate and extend operations
//-------------------------------------------------		//-------------------------------------------------

		craig.topperUnsubmitted Done Reply Inline Actions Why HasVLX? Shouldn't scalar instructions be valid under HasAVX512? craig.topper: Why HasVLX? Shouldn't scalar instructions be valid under HasAVX512?
let Sched = WriteShuffle256 in		let Sched = WriteShuffle256 in
		craig.topperUnsubmitted Done Reply Inline Actions Do we have test cases covering this pattern? I can't find any zero extend instructions craig.topper: Do we have test cases covering this pattern? I can't find any zero extend instructions
def AVX512_EXTEND : OpndItins<		def AVX512_EXTEND : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI		IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;		>;

let Sched = WriteShuffle256 in		let Sched = WriteShuffle256 in
def AVX512_TRUNCATE : OpndItins<		def AVX512_TRUNCATE : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI		IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;		>;
▲ Show 20 Lines • Show All 1,134 Lines • ▼ Show 20 Lines	defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;		AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,		defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,		0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;		AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;

let Predicates = [HasAVX512] in {		let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),		def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;		(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
		def : Pat<(v16f32 (vselect v16f32_info.KRCWM:$mask, (ffloor VR512:$src), VR512:$dst)),
		(VRNDSCALEPSZrrik VR512:$dst, v16f32_info.KRCWM:$mask, VR512:$src, (i32 0x9))>;
		craig.topperUnsubmitted Done Reply Inline Actions What about zero masking? craig.topper: What about zero masking?
def : Pat<(v16f32 (fnearbyint VR512:$src)),		def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;		(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
def : Pat<(v16f32 (fceil VR512:$src)),		def : Pat<(v16f32 (fceil VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;		(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
		def : Pat<(v16f32 (vselect v16f32_info.KRCWM:$mask, (fceil VR512:$src), VR512:$dst)),
		(VRNDSCALEPSZrrik VR512:$dst, v16f32_info.KRCWM:$mask, VR512:$src, (i32 0xA))>;
def : Pat<(v16f32 (frint VR512:$src)),		def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;		(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),		def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;		(VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;

def : Pat<(v8f64 (ffloor VR512:$src)),		def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;		(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
		def : Pat<(v8f64 (vselect v8f64_info.KRCWM:$mask, (ffloor VR512:$src), VR512:$dst)),
		(VRNDSCALEPDZrrik VR512:$dst, v8f64_info.KRCWM:$mask, VR512:$src, (i32 0x9))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),		def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;		(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
def : Pat<(v8f64 (fceil VR512:$src)),		def : Pat<(v8f64 (fceil VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;		(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
		def : Pat<(v8f64 (vselect v8f64_info.KRCWM:$mask, (fceil VR512:$src), VR512:$dst)),
		(VRNDSCALEPDZrrik VR512:$dst, v8f64_info.KRCWM:$mask, VR512:$src, (i32 0xA))>;
def : Pat<(v8f64 (frint VR512:$src)),		def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;		(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),		def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;		(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
}		}

let Predicates = [HasVLX] in {		let Predicates = [HasVLX] in {
def : Pat<(v4f32 (ffloor VR128X:$src)),		def : Pat<(v4f32 (ffloor VR128X:$src)),
▲ Show 20 Lines • Show All 1,534 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] VRNDSCALE* folding from masked and scalar ffloor and fceil patterns
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 140746

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86InstrAVX512.td

This is an archive of the discontinued LLVM Phabricator instance.

[X86] VRNDSCALE* folding from masked and scalar ffloor and fceil patternsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 140746

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86InstrAVX512.td

[X86] VRNDSCALE* folding from masked and scalar ffloor and fceil patterns
ClosedPublic