Diff 275880

llvm/lib/Target/PowerPC/PPCISelLowering.h

Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
///		///
XXSPLT,		XXSPLT,

/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for		/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
/// converting immediate single precision numbers to double precision		/// converting immediate single precision numbers to double precision
/// vector or scalar.		/// vector or scalar.
XXSPLTI_SP_TO_DP,		XXSPLTI_SP_TO_DP,

		/// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
		anil9Unsubmitted Not Done Reply Inline Actions nit : the other ones seem to have a extra line with /// anil9: nit : the other ones seem to have a extra line with ///
		///
		XXSPLTI32DX,

/// VECINSERT - The PPC vector insert instruction		/// VECINSERT - The PPC vector insert instruction
///		///
VECINSERT,		VECINSERT,

/// VECSHL - The PPC vector shift left instruction		/// VECSHL - The PPC vector shift left instruction
///		///
VECSHL,		VECSHL,

▲ Show 20 Lines • Show All 1,152 Lines • ▼ Show 20 Lines	private:
/// from one vector into the other.		/// from one vector into the other.
SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;		SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;

/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be		/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTB instruction introduced in ISA 3.0. This is		/// handled by the VINSERTB instruction introduced in ISA 3.0. This is
/// essentially v16i8 vector version of VINSERTH.		/// essentially v16i8 vector version of VINSERTH.
SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;		SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;

		/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
		/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
		SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
		anil9Unsubmitted Not Done Reply Inline Actions nit : /// otherwise return the default SDValue. ??? anil9: nit : /// otherwise return the default SDValue. ???
		nemanjaiUnsubmitted Not Done Reply Inline Actions All lowering and combine functions return a default constructed SDValue when unsuccessful. There is no reason to call that out specifically. nemanjai: All lowering and combine functions return a default constructed SDValue when unsuccessful.

// Return whether the call instruction can potentially be optimized to a		// Return whether the call instruction can potentially be optimized to a
// tail call. This will cause the optimizers to attempt to move, or		// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.		// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;		bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;		bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;		bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
}; // end class PPCTargetLowering		}; // end class PPCTargetLowering

Show All 16 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,471 Lines • ▼ Show 20 Lines	case PPCISD::FP_TO_SINT_IN_VSR:
return "PPCISD::FP_TO_SINT_IN_VSR";		return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";		case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";		case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";		case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VPERM: return "PPCISD::VPERM";		case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";		case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::XXSPLTI_SP_TO_DP:		case PPCISD::XXSPLTI_SP_TO_DP:
return "PPCISD::XXSPLTI_SP_TO_DP";		return "PPCISD::XXSPLTI_SP_TO_DP";
		case PPCISD::XXSPLTI32DX:
		return "PPCISD::XXSPLTI32DX";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";		case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";		case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";		case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";		case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";		case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";		case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";		case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";		case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
▲ Show 20 Lines • Show All 8,285 Lines • ▼ Show 20 Lines	if (ShiftElts) {
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}
SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);		SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,		SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
DAG.getConstant(InsertAtByte, dl, MVT::i32));		DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}

		/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
		/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
		/// return the default SDValue.
		SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
		SelectionDAG &DAG) const {
		// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
		// to v16i8. Peek through the bitcasts to get the actual operands.
		SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
		SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));

		leiUnsubmitted Done Reply Inline Actions `dl`->`DL` lei: `dl`->`DL`
		auto ShuffleMask = SVN->getMask();
		SDValue VecShuffle(SVN, 0);
		SDLoc DL(SVN);

		// Check that we have a four byte shuffle.
		if (!isNByteElemShuffleMask(SVN, 4, 1))
		nemanjaiUnsubmitted Not Done Reply Inline Actions This comment is incorrect. The canonical type is `v16i8`. nemanjai: This comment is incorrect. The canonical type is `v16i8`.
		return SDValue();

		nemanjaiUnsubmitted Not Done Reply Inline Actions Forgot to remove these? nemanjai: Forgot to remove these?
		// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
		nemanjaiUnsubmitted Not Done Reply Inline Actions Is there a reason we don't just define these this way above? i.e. `SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));` nemanjai: Is there a reason we don't just define these this way above? i.e. `SDValue LHS =…
		if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
		std::swap(LHS, RHS);
		VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
		ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
		}

		// Ensure that the RHS is a vector of constants.
		BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
		if (!BVN)
		return SDValue();

		// Check if RHS is a splat of 4-bytes (or smaller).
		APInt APSplatValue, APSplatUndef;
		unsigned SplatBitSize;
		bool HasAnyUndefs;
		if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
		nemanjaiUnsubmitted Not Done Reply Inline Actions You do not use `IsBVNConstSplat` anywhere except in the condition. You can just put the call in the condition i.e. `if (!BVN->isConstantSplat(...) \|\| SplatBitSize > 32)` nemanjai: You do not use `IsBVNConstSplat` anywhere except in the condition. You can just put the call in…
		HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|
		SplatBitSize > 32)
		return SDValue();

		// Check that the shuffle mask matches the semantics of XXSPLTI32DX.
		// The instruction splats a constant C into two words of the source vector
		// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
		nemanjaiUnsubmitted Not Done Reply Inline Actions This can be folded into the above condition. I think it is reasonable to expect the reader to understand that 32 bits is 4 bytes (on PPC) so we don't need to divide by 8. nemanjai: This can be folded into the above condition. I think it is reasonable to expect the reader to…
		// Thus we check that the shuffle mask is the equivalent of
		leiUnsubmitted Done Reply Inline Actions no need for the tmp `SplatSize` if ((SplatBitSize / 8) > 4) lei: no need for the tmp `SplatSize` ``` if ((SplatBitSize / 8) > 4) ```
		// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
		// Note: the check above of isNByteElemShuffleMask() ensures that the bytes
		anil9Unsubmitted Not Done Reply Inline Actions semantics the -> semantics of the anil9: semantics the -> semantics of the
		nemanjaiUnsubmitted Not Done Reply Inline Actions // Check that the shuffle mask matches the semantics of XXSPLTI32DX. // The instruction splats a constant C into two words of the source vector // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }. // Thus we check that the shuffle mask is the equivalent of // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively. // Note: the check above of isNByteElemShuffleMask() ensures that the bytes // within each word are consecutive, so we only need to check the first byte. nemanjai: ``` // Check that the shuffle mask matches the semantics of XXSPLTI32DX. // The instruction…
		// within each word are consecutive, so we only need to check the first byte.
		SDValue Index;
		bool IsLE = Subtarget.isLittleEndian();
		if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
		(ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
		ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
		Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
		else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
		(ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
		ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
		Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
		else
		leiUnsubmitted Done Reply Inline Actions There see to be alot of extra, unnecessary `()` here... since all these are `&&` I think alot of these can be removed. lei: There see to be alot of extra, unnecessary `()` here... since all these are `&&` I think alot…
		nemanjaiUnsubmitted Not Done Reply Inline Actions We are after type legalization here, can you please use legal types (i.e. no `MVT::i1`). nemanjai: We are after type legalization here, can you please use legal types (i.e. no `MVT::i1`).
		nemanjaiUnsubmitted Not Done Reply Inline Actions This is backwards. On LE, the rightmost element is element zero. In this path, the constant goes into the most significant word of each doubleword. So your `Index` needs to flip in both places. nemanjai: This is backwards. On LE, the rightmost element is element zero. In this path, the constant…
		return SDValue();
		nemanjaiUnsubmitted Not Done Reply Inline Actions If the splat is smaller than 32 bits, you need to replicate it. // If the splat is narrower than 32-bits, we need to get the 32-bit value // for XXSPLTI32DX. unsigned SplatVal = APSplatValue.getZExtValue(); for (; SplatBitSize < 32; SplatBitSize <<= 1) SplatVal \|= (SplatVal << SplatBitSize); and then use `SplatVal` below when creating the `XXSPLTI32DX` node. We also need a test case for this. Something like: vector int test(vector int a) { unsigned Val = 0xABABABAB; a[0] = Val; a[2] = Val; return a; } This should give you a `SplatBitSize == 8` and `APSplatValue == 0xAB`. nemanjai: If the splat is smaller than 32 bits, you need to replicate it. ``` // If the splat is narrower…

		leiUnsubmitted Done Reply Inline Actions same. lei: same.
		// If the splat is narrower than 32-bits, we need to get the 32-bit value
		// for XXSPLTI32DX.
		unsigned SplatVal = APSplatValue.getZExtValue();
		leiUnsubmitted Done Reply Inline Actions I think you are missing: else return SDValue(); lei: I think you are missing: ``` else return SDValue(); ```
		for (; SplatBitSize < 32; SplatBitSize <<= 1)
		SplatVal \|= (SplatVal << SplatBitSize);

		SDValue SplatNode = DAG.getNode(
		PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
		Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
		return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
		}

/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).		/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is		/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)		/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
/// i.e (or (shl x, C1), (srl x, 128-C1)).		/// i.e (or (shl x, C1), (srl x, 128-C1)).
SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {		SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");		assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
assert(Op.getValueType() == MVT::v1i128 &&		assert(Op.getValueType() == MVT::v1i128 &&
"Only set v1i128 as custom, other type shouldn't reach here!");		"Only set v1i128 as custom, other type shouldn't reach here!");
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines	if (ShiftElts) {
DAG.getConstant(InsertAtByte, dl, MVT::i32));		DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,		SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
DAG.getConstant(InsertAtByte, dl, MVT::i32));		DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);		return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}		}

		if (Subtarget.hasPrefixInstrs()) {
		SDValue SplatInsertNode;
		if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
		return SplatInsertNode;
		}

if (Subtarget.hasP9Altivec()) {		if (Subtarget.hasP9Altivec()) {
SDValue NewISDNode;		SDValue NewISDNode;
if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))		if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
return NewISDNode;		return NewISDNode;

if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))		if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
return NewISDNode;		return NewISDNode;
}		}
▲ Show 20 Lines • Show All 7,159 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

		//===----------------------------------------------------------------------===//
		// PowerPC ISA 3.1 specific type constraints.
		//

		def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
		SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
		]>;

		//===----------------------------------------------------------------------===//
		// ISA 3.1 specific PPCISD nodes.
		//

		def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;

		//===----------------------------------------------------------------------===//

// PC Relative flag (for instructions that use the address of the prefix for		// PC Relative flag (for instructions that use the address of the prefix for
// address computations).		// address computations).
class isPCRel { bit PCRel = 1; }		class isPCRel { bit PCRel = 1; }

// Top-level class for prefixed instructions.		// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,		class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> : Instruction {		InstrItinClass itin> : Instruction {
field bits<64> Inst;		field bits<64> Inst;
Show All 29 Lines	class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
// and one for 32-bit arguments, this bit breaks the degeneracy between		// and one for 32-bit arguments, this bit breaks the degeneracy between
// the two forms and allows TableGen to generate mapping tables.		// the two forms and allows TableGen to generate mapping tables.
bit Interpretation64Bit = 0;		bit Interpretation64Bit = 0;

// Fields used for relation models.		// Fields used for relation models.
string BaseName = "";		string BaseName = "";
}		}

class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,		class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
		leiUnsubmitted Done Reply Inline Actions nit: Maybe PowerPC ISA 3.1 specific type constraints. lei: nit: Maybe ``` PowerPC ISA 3.1 specific type constraints. ```
InstrItinClass itin, list<dag> pattern>		InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {		: PI<1, opcode, OOL, IOL, asmstr, itin> {
bits<5> FRS;		bits<5> FRS;
		leiUnsubmitted Not Done Reply Inline Actions nit: indentation? lei: nit: indentation?
		leiUnsubmitted Not Done Reply Inline Actions nvm. This is how it's been done else where.. lei: nvm. This is how it's been done else where..
bits<39> D_RA;		bits<39> D_RA;

		leiUnsubmitted Done Reply Inline Actions nit: // ISA 3.1 specific PPCISD nodes. lei: nit: ``` // ISA 3.1 specific PPCISD nodes. ```
let Pattern = pattern;		let Pattern = pattern;

// The prefix.		// The prefix.
let Inst{6-7} = 2;		let Inst{6-7} = 2;
let Inst{8-10} = 0;		let Inst{8-10} = 0;
let Inst{11} = PCRel;		let Inst{11} = PCRel;
let Inst{12-13} = 0;		let Inst{12-13} = 0;
let Inst{14-31} = D_RA{33-16}; // d0		let Inst{14-31} = D_RA{33-16}; // d0
▲ Show 20 Lines • Show All 667 Lines • ▼ Show 20 Lines	def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
[]>;		[]>;
def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),		def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
(ins i32imm:$IMM32),		(ins i32imm:$IMM32),
"xxspltidp $XT, $IMM32", IIC_VecGeneral,		"xxspltidp $XT, $IMM32", IIC_VecGeneral,
[(set v2f64:$XT,		[(set v2f64:$XT,
(PPCxxspltidp i32:$IMM32))]>;		(PPCxxspltidp i32:$IMM32))]>;
def XXSPLTI32DX :		def XXSPLTI32DX :
8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),		8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
(ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32),		(ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
"xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, []>,		"xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral,
		[(set v2i64:$XT,
		nemanjaiUnsubmitted Not Done Reply Inline Actions Can we please use `i32` rather than `i1` as the latter could lead to issues (with using CRBIT registers which we really don't want to do). nemanjai: Can we please use `i32` rather than `i1` as the latter could lead to issues (with using CRBIT…
		(PPCxxsplti32dx v2i64:$XTi, i32:$IX,
		i32:$IMM32))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;		RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
def XXPERMX :		def XXPERMX :
8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,		8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
vsrc:$XC, u3imm:$UIM),		vsrc:$XC, u3imm:$UIM),
"xxpermx $XT, $XA, $XB, $XC, $UIM",		"xxpermx $XT, $XA, $XB, $XC, $UIM",
IIC_VecPerm, []>;		IIC_VecPerm, []>;
def XXBLENDVB :		def XXBLENDVB :
8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,		8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
▲ Show 20 Lines • Show All 222 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
				; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s \| \
				; RUN: FileCheck --check-prefix=CHECK-LE %s
				; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
				; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s \| \
				; RUN: FileCheck --check-prefix=CHECK-BE %s

				; Function Attrs: norecurse nounwind readnone
				define <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) {
				; CHECK-LE-LABEL: test_xxsplti32dx_1:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 566
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_1:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 566
				; CHECK-BE-NEXT: blr
				entry:
				%vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x i32> %vecins1
				nemanjaiUnsubmitted Not Done Reply Inline Actions The result of this shuffle is: `{ a[0], 566, a[2], 566 }` Which produces a vector register: LE: [ 566 \| a[2] \| 566 \| a[0] ] => xxsplti32dx vs34, 0, 566 BE: [ a[0] \| 566 \| a[2] \| 566 ] => xxsplti32dx vs34, 1, 566 So it is backwards - similarly to all the test cases. nemanjai: The result of this shuffle is: `{ a[0], 566, a[2], 566 }` Which produces a vector register: ```…
				}

				; Function Attrs: norecurse nounwind readnone
				define <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) {
				; CHECK-LE-LABEL: test_xxsplti32dx_2:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 33
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_2:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 33
				; CHECK-BE-NEXT: blr
				entry:
				%vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x i32> %vecins1
				}

				; Function Attrs: norecurse nounwind readnone
				define <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) {
				; CHECK-LE-LABEL: test_xxsplti32dx_3:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 12
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_3:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 12
				; CHECK-BE-NEXT: blr
				entry:
				%vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x i32> %vecins1
				}

				; Function Attrs: norecurse nounwind readnone
				define <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) {
				; CHECK-LE-LABEL: test_xxsplti32dx_4:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -683
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_4:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -683
				; CHECK-BE-NEXT: blr
				entry:
				%vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x i32> %vecins1
				}

				; Function Attrs: nounwind
				define <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) {
				; CHECK-LE-LABEL: test_xxsplti32dx_5:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 1065353216
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_5:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 1065353216
				; CHECK-BE-NEXT: blr
				entry:
				%vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x float> %vecins3.i
				}

				; Function Attrs: nounwind
				define <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) {
				; CHECK-LE-LABEL: test_xxsplti32dx_6:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1073741824
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_6:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1073741824
				; CHECK-BE-NEXT: blr
				entry:
				%vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x float> %vecins3.i
				}

				; Function Attrs: norecurse nounwind readnone
				; Test to illustrate when the splat is narrower than 32-bits.
				define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) local_unnamed_addr #0 {
				; CHECK-LE-LABEL: test_xxsplti32dx_7:
				; CHECK-LE: # %bb.0: # %entry
				; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1414812757
				; CHECK-LE-NEXT: blr
				;
				; CHECK-BE-LABEL: test_xxsplti32dx_7:
				; CHECK-BE: # %bb.0: # %entry
				; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1414812757
				; CHECK-BE-NEXT: blr
				entry:
				%vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
				ret <4 x i32> %vecins1
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Exploit the xxsplti32dx instruction when lowering VECTOR_SHUFFLE.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 275880

llvm/lib/Target/PowerPC/PPCISelLowering.h

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC][Power10] Exploit the xxsplti32dx instruction when lowering VECTOR_SHUFFLE.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 275880

llvm/lib/Target/PowerPC/PPCISelLowering.h

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

[PowerPC][Power10] Exploit the xxsplti32dx instruction when lowering VECTOR_SHUFFLE.
ClosedPublic