Diff 245633

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 1,236 Lines • ▼ Show 20 Lines	: Intrinsic<[],
[		[
llvm_anyvector_ty,		llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty, llvm_i64_ty		llvm_anyvector_ty, llvm_i64_ty
],		],
[IntrWriteMem, IntrArgMemOnly]>;		[IntrWriteMem, IntrArgMemOnly]>;

//		//
		// Non-temporal gather load/scatter store
		//

		class SVE2_NTGatherLoad_VectorBase_Intrinsic
		: Intrinsic<[llvm_anyvector_ty],
		[
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		llvm_anyvector_ty,
		llvm_i64_ty
		],
		[IntrReadMem, IntrArgMemOnly]>;

		class SVE2_NTScatterStore_VectorBase_Intrinsic
		: Intrinsic<[],
		[
		llvm_anyvector_ty,
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		llvm_anyvector_ty, llvm_i64_ty
		],
		[IntrWriteMem, IntrArgMemOnly]>;

		def int_aarch64_sve_ldnt1_gather : SVE2_NTGatherLoad_VectorBase_Intrinsic;
		sdesmalenUnsubmitted Not Done Reply Inline Actions Can you derive from AdvSIMD_GatherLoad_VectorBase_Intrinsic instead? (and something similar for the scatter store) This also makes it more clear that these have the exact same form as the normal gathers. sdesmalen: Can you derive from AdvSIMD_GatherLoad_VectorBase_Intrinsic instead? (and something similar for…
		def int_aarch64_sve_stnt1_scatter : SVE2_NTScatterStore_VectorBase_Intrinsic;

		//
// Loads		// Loads
//		//

def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;		def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;

def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;		def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;
def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;		def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;

▲ Show 20 Lines • Show All 758 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 234 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
// Signed gather loads		// Signed gather loads
GLD1S,		GLD1S,
GLD1S_SCALED,		GLD1S_SCALED,
GLD1S_UXTW,		GLD1S_UXTW,
GLD1S_SXTW,		GLD1S_SXTW,
GLD1S_UXTW_SCALED,		GLD1S_UXTW_SCALED,
GLD1S_SXTW_SCALED,		GLD1S_SXTW_SCALED,
GLD1S_IMM,		GLD1S_IMM,

		// Non-temporal gather loads
		GLDNT1,
		GLDNT1S,

// Scatter store		// Scatter store
SST1,		SST1,
SST1_SCALED,		SST1_SCALED,
SST1_UXTW,		SST1_UXTW,
SST1_SXTW,		SST1_SXTW,
SST1_UXTW_SCALED,		SST1_UXTW_SCALED,
SST1_SXTW_SCALED,		SST1_SXTW_SCALED,
SST1_IMM,		SST1_IMM,

		// Non-temporal scatter store
		SSTNT1,

// Strict (exception-raising) floating point comparison		// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,		STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,		STRICT_FCMPE,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
LD4post,		LD4post,
▲ Show 20 Lines • Show All 581 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,415 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";		case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";		case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";		case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";		case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";		case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";		case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";		case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";		case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
		case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
		case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";
case AArch64ISD::SST1: return "AArch64ISD::SST1";		case AArch64ISD::SST1: return "AArch64ISD::SST1";
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";		case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";		case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";		case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";		case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";		case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";		case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
		case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";
case AArch64ISD::LDP: return "AArch64ISD::LDP";		case AArch64ISD::LDP: return "AArch64ISD::LDP";
case AArch64ISD::STP: return "AArch64ISD::STP";		case AArch64ISD::STP: return "AArch64ISD::STP";
case AArch64ISD::STNP: return "AArch64ISD::STNP";		case AArch64ISD::STNP: return "AArch64ISD::STNP";
case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";		case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";		case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
}		}
return nullptr;		return nullptr;
}		}
▲ Show 20 Lines • Show All 8,913 Lines • ▼ Show 20 Lines	case AArch64ISD::LDFF1:
break;		break;
case AArch64ISD::GLD1:		case AArch64ISD::GLD1:
case AArch64ISD::GLD1_SCALED:		case AArch64ISD::GLD1_SCALED:
case AArch64ISD::GLD1_SXTW:		case AArch64ISD::GLD1_SXTW:
case AArch64ISD::GLD1_SXTW_SCALED:		case AArch64ISD::GLD1_SXTW_SCALED:
case AArch64ISD::GLD1_UXTW:		case AArch64ISD::GLD1_UXTW:
case AArch64ISD::GLD1_UXTW_SCALED:		case AArch64ISD::GLD1_UXTW_SCALED:
case AArch64ISD::GLD1_IMM:		case AArch64ISD::GLD1_IMM:
		case AArch64ISD::GLDNT1:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();		MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;		break;
default:		default:
return SDValue();		return SDValue();
}		}

if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))		if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;		return Src;
▲ Show 20 Lines • Show All 2,144 Lines • ▼ Show 20 Lines	if (!T->isSized() \|\|
return SDValue();		return SDValue();

SDLoc DL(GN);		SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);		SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,		return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));		DAG.getConstant(MinOffset, DL, MVT::i64));
}		}

static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,		static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,		unsigned Opcode,
bool OnlyPackedOffsets = true) {		bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);		const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);		const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&		assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");		"Scatter stores are only possible for SVE vectors");

SDLoc DL(N);		SDLoc DL(N);
Show All 11 Lines	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(4);		SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);		SDValue Offset = N->getOperand(5);

// SST1_IMM requires that the offset is an immediate:		// SST1_IMM requires that the offset is an immediate:
// * multiple of #SizeInBytes		// * multiple of #SizeInBytes
// * in the range [0, 31 x #SizeInBytes]		// * in the range [0, 31 x #SizeInBytes]
// where #SizeInBytes is the size in bytes of the stored		// where #SizeInBytes is the size in bytes of the stored items. For
// items. For immediates outside that range and non-immediate scalar offsets use		// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1 or SST1_UXTW instead.		// SST1_UXTW instead.
		sdesmalenUnsubmitted Not Done Reply Inline Actions Have you experimented moving this code out of this function (same for the `SST1_IMM` case below), and pass in the Base and Offset (and possibly Chain/PG) as operands to `performScatterStoreCombine`. In the case-statement for `aarch64_sve_st1_scatter_scalar_offset` and `aarch64_sve_stnt1_scatter_scalar_offset` you can than do the swap. That seems a bit better than special handling these cases in this combine itself. sdesmalen: Have you experimented moving this code out of this function (same for the `SST1_IMM` case…
		andwarAuthorUnsubmitted Done Reply Inline Actions Yes, but currently: it is very explicit that `AArch64ISD::SSTNT1` requires some special treatment, which IMO is a bit counterintuitive (hopefully the comments make it clear!) I'm only passing one argument (`N`), instead of 3 (`N`, `SDValue`, `SDValue`) to `performScatterStoreCombine` (not counting the other arguments), so the call-site is cleaner if we keep things as they are. Also, `SST1_IMM` requires 2 conditions to be checked and the opcode to be updated (and there are 2 possibilities here, either `SST1_UXTW` or `SST1`). Swapping `Base` and `Offset` when calling `performScatterStoreCombine` wouldn't be enough to replicate this. Having said that, I've been looking at `performScatterStoreCombine`/ `performGatherLoadCombine` for a while now and I wouldn't be surprised if I'm over-engineering this :) andwar: Yes, but currently: * it is very explicit that `AArch64ISD::SSTNT1` requires some special…
		sdesmalenUnsubmitted Not Done Reply Inline Actions Fair enough. If this function ever needs to be extended for more intrinsics, we may want to reconsider generalising this, but this is fine for now then. sdesmalen: Fair enough. If this function ever needs to be extended for more intrinsics, we may want to…
if (Opcode == AArch64ISD::SST1_IMM) {		if (Opcode == AArch64ISD::SST1_IMM) {
uint64_t MaxIndex = 31;		uint64_t MaxIndex = 31;
uint64_t SrcElSize = SrcElVT.getStoreSize().getKnownMinSize();		uint64_t SrcElSize = SrcElVT.getStoreSize().getKnownMinSize();

ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());		ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
if (nullptr == OffsetConst \|\|		if (nullptr == OffsetConst \|\|
OffsetConst->getZExtValue() > MaxIndex * SrcElSize \|\|		OffsetConst->getZExtValue() > MaxIndex * SrcElSize \|\|
OffsetConst->getZExtValue() % SrcElSize) {		OffsetConst->getZExtValue() % SrcElSize) {
Show All 19 Lines	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,

if (!TLI.isTypeLegal(Offset.getValueType()))		if (!TLI.isTypeLegal(Offset.getValueType()))
return SDValue();		return SDValue();

// Source value type that is representable in hardware		// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);		EVT HwSrcVt = getSVEContainerType(SrcVT);

// Keep the original type of the input data to store - this is needed to		// Keep the original type of the input data to store - this is needed to
// differentiate between ST1B, ST1H, ST1W and ST1D. For FP values we want the		// differentiate between the actual data sizes and instructions, e.g. ST1B,
		// ST1H, ST1W and ST1D for regular scatter stores. For FP values we want the
// integer equivalent, so just use HwSrcVt.		// integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);		SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())		if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);		InputVT = DAG.getValueType(HwSrcVt);

SDVTList VTs = DAG.getVTList(MVT::Other);		SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue SrcNew;		SDValue SrcNew;

if (Src.getValueType().isFloatingPoint())		if (Src.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);		SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
else		else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);		SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);

SDValue Ops[] = {N->getOperand(0), // Chain		SDValue Ops[] = {N->getOperand(0), // Chain
SrcNew,		SrcNew,
N->getOperand(3), // Pg		N->getOperand(3), // Pg
Base,		Base,
Offset,		Offset,
InputVT};		InputVT};

return DAG.getNode(Opcode, DL, VTs, Ops);		return DAG.getNode(Opcode, DL, VTs, Ops);
}		}

static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,		static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,		unsigned Opcode,
bool OnlyPackedOffsets = true) {		bool OnlyPackedOffsets = true) {
EVT RetVT = N->getValueType(0);		EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&		assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");		"Gather loads are only possible for SVE vectors");
SDLoc DL(N);		SDLoc DL(N);

		// Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)		if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();		return SDValue();

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(3);		SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);		SDValue Offset = N->getOperand(4);

// GLD1_IMM requires that the offset is an immediate:		// GLD1_IMM requires that the offset is an immediate:
// * multiple of #SizeInBytes		// * multiple of #SizeInBytes
// * in the range [0, 31 x #SizeInBytes]		// * in the range [0, 31 x #SizeInBytes]
// where #SizeInBytes is the size in bytes of the loaded items. For immediates		// where #SizeInBytes is the size in bytes of the loaded items. For immediates
// outside that range and non-immediate scalar offsets use GLD1 or GLD1_UXTW		// outside that range and non-immediate scalar offsets use GLD1 or GLD1_UXTW
// instead.		// instead.
if (Opcode == AArch64ISD::GLD1_IMM) {		if (Opcode == AArch64ISD::GLD1_IMM) {
uint64_t MaxIndex = 31;		uint64_t MaxIndex = 31;
uint64_t RetElSize = RetVT.getVectorElementType()		uint64_t RetElSize = RetVT.getVectorElementType()
.getSimpleVT()		.getSimpleVT()
.getStoreSize()		.getStoreSize()
.getKnownMinSize();		.getKnownMinSize();
Show All 20 Lines	static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
// nxv2i64. Legalize accordingly.		// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&		if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)		Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);		Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

// Return value type that is representable in hardware		// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);		EVT HwRetVt = getSVEContainerType(RetVT);

// Keep the original output value type around - this will better inform		// Keep the original output value type around - this is needed to
// optimisations (e.g. instruction folding when load is followed by		// differentiate between the actual data sizes and instructions, e.g. LD1B,
// zext/sext). This will only be used for ints, so the value for FPs		// LD1H, LD1W and LD1D. For FP values we want the integer equivalent, so just
// doesn't matter.		// use HwRetVT.
SDValue OutVT = DAG.getValueType(RetVT);		SDValue OutVT = DAG.getValueType(RetVT);
if (RetVT.isFloatingPoint())		if (RetVT.isFloatingPoint())
OutVT = DAG.getValueType(HwRetVt);		OutVT = DAG.getValueType(HwRetVt);

SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);		SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
SDValue Ops[] = {N->getOperand(0), // Chain		SDValue Ops[] = {N->getOperand(0), // Chain
N->getOperand(2), // Pg		N->getOperand(2), // Pg
Base, Offset, OutVT};		Base, Offset, OutVT};

SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);		SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);		SDValue LoadChain = SDValue(Load.getNode(), 1);

if (RetVT.isInteger() && (RetVT != HwRetVt))		if (RetVT.isInteger() && (RetVT != HwRetVt))
Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));		Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));

// If the original return value was FP, bitcast accordingly. Doing it here		// If the original return value was FP, bitcast accordingly. Doing it here
// means that we can avoid adding TableGen patterns for FPs.		// means that we can avoid adding TableGen patterns for FPs.
if (RetVT.isFloatingPoint())		if (RetVT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));		Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));

return DAG.getMergeValues({Load, LoadChain}, DL);		return DAG.getMergeValues({Load, LoadChain}, DL);
}		}


static SDValue		static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,		performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {		SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())		if (DCI.isBeforeLegalizeOps())
return SDValue();		return SDValue();

SDValue Src = N->getOperand(0);		SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();		unsigned Opc = Src->getOpcode();
Show All 27 Lines	case AArch64ISD::GLD1_UXTW:
NewOpc = AArch64ISD::GLD1S_UXTW;		NewOpc = AArch64ISD::GLD1S_UXTW;
break;		break;
case AArch64ISD::GLD1_UXTW_SCALED:		case AArch64ISD::GLD1_UXTW_SCALED:
NewOpc = AArch64ISD::GLD1S_UXTW_SCALED;		NewOpc = AArch64ISD::GLD1S_UXTW_SCALED;
break;		break;
case AArch64ISD::GLD1_IMM:		case AArch64ISD::GLD1_IMM:
NewOpc = AArch64ISD::GLD1S_IMM;		NewOpc = AArch64ISD::GLD1S_IMM;
break;		break;
		case AArch64ISD::GLDNT1:
		NewOpc = AArch64ISD::GLDNT1S;
		break;
default:		default:
return SDValue();		return SDValue();
}		}

EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();		EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();		EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();

if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())		if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_neon_st1x3:		case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:		case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:		case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:		case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:		case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);		return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:		case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);		return performLDNT1Combine(N, DAG);
		case Intrinsic::aarch64_sve_ldnt1_gather:
		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnf1:		case Intrinsic::aarch64_sve_ldnf1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);
case Intrinsic::aarch64_sve_ldff1:		case Intrinsic::aarch64_sve_ldff1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);
case Intrinsic::aarch64_sve_stnt1:		case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);		return performSTNT1Combine(N, DAG);
		case Intrinsic::aarch64_sve_stnt1_scatter:
		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_ld1_gather:		case Intrinsic::aarch64_sve_ld1_gather:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);
case Intrinsic::aarch64_sve_ld1_gather_index:		case Intrinsic::aarch64_sve_ld1_gather_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:		case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:		case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:		case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:		case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM);
case Intrinsic::aarch64_sve_st1_scatter:		case Intrinsic::aarch64_sve_st1_scatter:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1);
case Intrinsic::aarch64_sve_st1_scatter_index:		case Intrinsic::aarch64_sve_st1_scatter_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:		case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:		case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:		case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:		case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);		return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM);
default:		default:
break;		break;
}		}
break;		break;
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());		return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}		}
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 710 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show All 19 Lines
def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LDNF1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

// Gather loads - node definitions		// Gather loads - node definitions
//		//
def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [		def STD_AArch64_GATHER_SV : SDTypeProfile<1, 4, [
		andwarAuthorUnsubmitted Done Reply Inline Actions This should be `SDT` instead of `STD`. I will fix this. andwar: This should be `SDT` instead of `STD`. I will fix this.
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [		def STD_AArch64_GATHER_VS : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", STD_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", STD_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;		def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", STD_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

		def AArch64ldnt1_gather : SDNode<"AArch64ISD::GLDNT1", STD_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
		def AArch64ldnt1s_gather : SDNode<"AArch64ISD::GLDNT1S", STD_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;

// Scatter stores - node definitions		// Scatter stores - node definitions
//		//
def SDT_AArch64_SST1 : SDTypeProfile<0, 5, [		def STD_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def SDT_AArch64_SST1_IMM : SDTypeProfile<0, 5, [		def STD_AArch64_SCATTER_VS : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", STD_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SST1_IMM, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;		def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", STD_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;

		def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1", STD_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;

// AArch64 SVE/SVE2 - the remaining node definitions		// AArch64 SVE/SVE2 - the remaining node definitions
//		//

// SVE CNT/INC/RDVL		// SVE CNT/INC/RDVL
def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;		def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;
def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;		def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;
def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;		def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;
▲ Show 20 Lines • Show All 1,699 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE2] in {

// SVE2 bitwise xor and rotate right by immediate		// SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;		defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;

// SVE2 extract vector (immediate offset, constructive)		// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;		def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;

// SVE2 non-temporal gather loads		// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;		defm LDNT1SB_ZZR_S : sve2_mem_gldnt_32b_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather, nxv4i8>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;		defm LDNT1B_ZZR_S : sve2_mem_gldnt_32b_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather, nxv4i8>;
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;		defm LDNT1SH_ZZR_S : sve2_mem_gldnt_32b_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather, nxv4i16>;
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;		defm LDNT1H_ZZR_S : sve2_mem_gldnt_32b_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather, nxv4i16>;
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;		defm LDNT1W_ZZR_S : sve2_mem_gldnt_32b_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather, nxv4i32>;

defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;		defm LDNT1SB_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather, nxv2i8>;
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;		defm LDNT1B_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather, nxv2i8>;
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;		defm LDNT1SH_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather, nxv2i16>;
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;		defm LDNT1H_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather, nxv2i16>;
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;		defm LDNT1SW_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather, nxv2i32>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;		defm LDNT1W_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather, nxv2i32>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;		defm LDNT1D_ZZR_D : sve2_mem_gldnt_64b_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather, nxv2i64>;

// SVE2 vector splice (constructive)		// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;		defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;

// SVE2 non-temporal scatter stores		// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;		defm STNT1B_ZZR_S : sve2_mem_sstnt_32b_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;		defm STNT1H_ZZR_S : sve2_mem_sstnt_32b_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;		defm STNT1W_ZZR_S : sve2_mem_sstnt_32b_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>;

defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;		defm STNT1B_ZZR_D : sve2_mem_sstnt_64b_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>;
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;		defm STNT1H_ZZR_D : sve2_mem_sstnt_64b_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;		defm STNT1W_ZZR_D : sve2_mem_sstnt_64b_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;		defm STNT1D_ZZR_D : sve2_mem_sstnt_64b_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;

// SVE2 table lookup (three sources)		// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;		defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;		defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;

// SVE2 integer compare scalar count and limit		// SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;		defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;		defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,938 Lines • ▼ Show 20 Lines	: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
let Inst{15-13} = 0b001;		let Inst{15-13} = 0b001;
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Zn;		let Inst{9-5} = Zn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayStore = 1;		let mayStore = 1;
}		}

multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,		multiclass sve2_mem_sstnt_32b_ptrs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {		SDPatternOperator op,
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;		ValueType vt> {
		def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;
		sdesmalenUnsubmitted Done Reply Inline Actions nit: vec_32b_ptrs ? sdesmalen: nit: vec_32b_ptrs ?
		andwarAuthorUnsubmitted Done Reply Inline Actions I'll use `vec_32_ptrs` instead. That's consistent with regular gather loads: https://github.com/llvm/llvm-project/blob/cff90c938b7be43de482ffb7a8a7fdbdf57c32a3/llvm/lib/Target/AArch64/SVEInstrFormats.td#L6257 andwar: I'll use `vec_32_ptrs` instead. That's consistent with regular gather loads: https://github.
		sdesmalenUnsubmitted Not Done Reply Inline Actions If you change the name of this class, you may want to update the parent class as well. sdesmalen: If you change the name of this class, you may want to update the parent class as well.
		andwarAuthorUnsubmitted Done Reply Inline Actions What about setting the name of this multi class to `sve2_mem_gldnt_vec_vs_32_ptrs` instead? This way updating the name of the base class is no longer needed. andwar: What about setting the name of this multi class to `sve2_mem_gldnt_vec_vs_32_ptrs` instead?

		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

		def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt),
		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
		}

		multiclass sve2_mem_sstnt_64b_ptrs<bits<3> opc, string asm,
		SDPatternOperator op,
		ValueType vt> {
		def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;		(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

		def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt),
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}		}

class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,		class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
RegisterOperand VecList, RegisterOperand zprext>		RegisterOperand VecList, RegisterOperand zprext>
: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),		: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
asm, "\t$Zt, $Pg, [$Rn, $Zm]",		asm, "\t$Zt, $Pg, [$Rn, $Zm]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
▲ Show 20 Lines • Show All 1,407 Lines • ▼ Show 20 Lines	: I<(outs VecList:$Zt), iops,
let Inst{14-13} = opc{1-0};		let Inst{14-13} = opc{1-0};
let Inst{12-10} = Pg;		let Inst{12-10} = Pg;
let Inst{9-5} = Zn;		let Inst{9-5} = Zn;
let Inst{4-0} = Zt;		let Inst{4-0} = Zt;

let mayLoad = 1;		let mayLoad = 1;
}		}

multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,		multiclass sve2_mem_gldnt_32b_ptrs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {		SDPatternOperator op,
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),		ValueType vt> {
asm, listty>;		def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm),
		asm, Z_s>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;		(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

		def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)),
		(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
		}

		multiclass sve2_mem_gldnt_64b_ptrs<bits<5> opc, string asm,
		SDPatternOperator op,
		ValueType vt> {
		def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm),
		asm, Z_d>;

		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
		(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

		def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)),
		(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SVE Memory - 64-bit Gather Group		// SVE Memory - 64-bit Gather Group
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

// bit xs is '1' if offsets are signed		// bit xs is '1' if offsets are signed
// bit scaled is '1' if the offsets are scaled		// bit scaled is '1' if the offsets are scaled
▲ Show 20 Lines • Show All 658 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; LDNT1B, LDNT1W, LDNT1H, LDNT1D: vector base + scalar offset
				; ldnt1b { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1B
				define <vscale x 4 x i32> @gldnt1b_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1b_s:
				; CHECK: ldnt1b { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1b_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1b_d:
				; CHECK: ldnt1b { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1H
				define <vscale x 4 x i32> @gldnt1h_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1h_s:
				; CHECK: ldnt1h { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1h_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1h_d:
				; CHECK: ldnt1h { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1W
				define <vscale x 4 x i32> @gldnt1w_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_s:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x i32> %load
				}

				define <vscale x 4 x float> @gldnt1w_s_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_s_float:
				; CHECK: ldnt1w { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret <vscale x 4 x float> %load
				}

				define <vscale x 2 x i64> @gldnt1w_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1w_d:
				; CHECK: ldnt1w { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1D
				define <vscale x 2 x i64> @gldnt1d_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1d_d:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x i64> %load
				}

				; LDNT1D
				define <vscale x 2 x double> @gldnt1d_d_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1d_d_double:
				; CHECK: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret <vscale x 2 x double> %load
				}

				;
				; LDNT1SB, LDNT1SW, LDNT1SH, LDNT1SD: vector base + scalar offset
				; ldnt1sb { z0.s }, p0/z, [z0.s, x0]
				;

				; LDNT1SB
				define <vscale x 4 x i32> @gldnt1sb_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sb_s:
				; CHECK: ldnt1sb { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1sb_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sb_d:
				; CHECK: ldnt1sb { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1SH
				define <vscale x 4 x i32> @gldnt1sh_s(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sh_s:
				; CHECK: ldnt1sh { z0.s }, p0/z, [z0.s, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv416.nxv4i32(<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
				ret <vscale x 4 x i32> %res
				}

				define <vscale x 2 x i64> @gldnt1sh_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sh_d:
				; CHECK: ldnt1sh { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1SW
				define <vscale x 2 x i64> @gldnt1sw_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: gldnt1sw_d:
				; CHECK: ldnt1sw { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				; LDNT1B/LDNT1SB
				declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnt1.gather.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LDNT1H/LDNT1SH
				declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv416.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				; LDNT1W/LDNT1SW
				declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; LDNT1D
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; STNT1B, STNT1W, STNT1H, STNT1D: vector base + scalar offset
				; stnt1b { z0.s }, p0/z, [z0.s, x0]
				;

				; STNT1B
				define void @stnt1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1b_s:
				; CHECK: stnt1b { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1b_d:
				; CHECK: stnt1b { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1H
				define void @stnt1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1h_s:
				; CHECK: stnt1h { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1h_d:
				; CHECK: stnt1h { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1W
				define void @stnt1w_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_s:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1w_f32_s(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_f32_s:
				; CHECK: stnt1w { z0.s }, p0, [z1.s, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
				<vscale x 4 x i1> %pg,
				<vscale x 4 x i32> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1w_d:
				; CHECK: stnt1w { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1D
				define void @stnt1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1d_d:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				define void @stnt1d_f64_d(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
				; CHECK-LABEL: stnt1d_f64_d:
				; CHECK: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				<vscale x 2 x i64> %base,
				i64 %offset)
				ret void
				}

				; STNT1B
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1H
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1W
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				declare void @llvm.aarch64.sve.stnt1.scatter.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)

				; STNT1D
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2f32.nxv2i64(<vscale x 2 x float>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
				declare void @llvm.aarch64.sve.stnt1.scatter.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-stores
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 245633

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-storesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 245633

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores.ll

[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-stores
ClosedPublic