Diff 249660

llvm/include/llvm/IR/IntrinsicsAArch64.td

	Show First 20 Lines • Show All 1,761 Lines • ▼ Show 20 Lines

	//			//
	// Non-temporal gather loads: scalar base + vector offsets			// Non-temporal gather loads: scalar base + vector offsets
	//			//

	// 64 bit unscaled offsets			// 64 bit unscaled offsets
	def int_aarch64_sve_ldnt1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;			def int_aarch64_sve_ldnt1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;

				// 64 bit indices
				def int_aarch64_sve_ldnt1_gather_index : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;

	// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits			// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
	def int_aarch64_sve_ldnt1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;			def int_aarch64_sve_ldnt1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;

	//			//
	// Non-temporal gather loads: vector base + scalar offset			// Non-temporal gather loads: vector base + scalar offset
	//			//

	def int_aarch64_sve_ldnt1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;			def int_aarch64_sve_ldnt1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;
	Show All 31 Lines

	//			//
	// Non-temporal scatter stores: scalar base + vector offsets			// Non-temporal scatter stores: scalar base + vector offsets
	//			//

	// 64 bit unscaled offsets			// 64 bit unscaled offsets
	def int_aarch64_sve_stnt1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;			def int_aarch64_sve_stnt1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;

				// 64 bit indices
				def int_aarch64_sve_stnt1_scatter_index
				: AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;

	// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits			// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
	def int_aarch64_sve_stnt1_scatter_uxtw : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;			def int_aarch64_sve_stnt1_scatter_uxtw : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;

	//			//
	// Non-temporal scatter stores: vector base + scalar offset			// Non-temporal scatter stores: vector base + scalar offset
	//			//

	def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;			def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;
	▲ Show 20 Lines • Show All 332 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,256 Lines • ▼ Show 20 Lines	case ISD::ROTR:
assert(VT.isInteger() && N2.getValueType().isInteger() &&		assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");		"Shifts only work on integers");
assert((!VT.isVector() \|\| VT == N2.getValueType()) &&		assert((!VT.isVector() \|\| VT == N2.getValueType()) &&
"Vector shift amounts must be in the same as their first arg");		"Vector shift amounts must be in the same as their first arg");
// Verify that the shift amount VT is big enough to hold valid shift		// Verify that the shift amount VT is big enough to hold valid shift
// amounts. This catches things like trying to shift an i1024 value by an		// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses		// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().		// TLI.getShiftAmount().
assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&		assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >=
		Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) &&
"Invalid use of small shift amount with oversized value!");		"Invalid use of small shift amount with oversized value!");
		andwarAuthorUnsubmitted Done Reply Inline Actions This change is needed to accommodate for scalable types. andwar: This change is needed to accommodate for scalable types.

// Always fold shifts of i1 values so the code generator doesn't need to		// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the		// handle them. Since we know the size of the shift has to be less than the
// size of the value, the shift/rotate count is guaranteed to be zero.		// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)		if (VT == MVT::i1)
return N1;		return N1;
if (N2C && N2C->isNullValue())		if (N2C && N2C->isNullValue())
return N1;		return N1;
▲ Show 20 Lines • Show All 4,520 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Show First 20 Lines • Show All 184 Lines • ▼ Show 20 Lines	bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);		return SelectSVEAddSubImm(N, VT, Imm, Shift);
}		}

template<MVT::SimpleValueType VT>		template<MVT::SimpleValueType VT>
bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {		bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
return SelectSVELogicalImm(N, VT, Imm);		return SelectSVELogicalImm(N, VT, Imm);
}		}

		template <unsigned Low, unsigned High>
		bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) {
		return SelectSVEShiftImm64(N, Low, High, Imm);
		}

// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.		// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
template<signed Min, signed Max, signed Scale, bool Shift>		template<signed Min, signed Max, signed Scale, bool Shift>
bool SelectCntImm(SDValue N, SDValue &Imm) {		bool SelectCntImm(SDValue N, SDValue &Imm) {
if (!isa<ConstantSDNode>(N))		if (!isa<ConstantSDNode>(N))
return false;		return false;

int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();		int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
if (Shift)		if (Shift)
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines	private:

bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);		bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);

bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);		bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);

bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);		bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);

bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);		bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
		bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High,
		SDValue &Imm);

bool SelectSVEArithImm(SDValue N, SDValue &Imm);		bool SelectSVEArithImm(SDValue N, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,		bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);		SDValue &Offset);
};		};
} // end anonymous namespace		} // end anonymous namespace

/// isIntImmediate - This method tests to see if the node is a constant		/// isIntImmediate - This method tests to see if the node is a constant
▲ Show 20 Lines • Show All 2,749 Lines • ▼ Show 20 Lines	if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {		if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);		Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
return true;		return true;
}		}
}		}
return false;		return false;
}		}

		// This method is only needed to "cast" i64s into i32s when the value
		// is a valid shift which has been splatted into a vector with i64 elements.
		// Every other type is fine in tablegen.
		bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low,
		sdesmalenUnsubmitted Not Done Reply Inline Actions Can you also create some negative tests for this change? sdesmalen: Can you also create some negative tests for this change?
		andwarAuthorUnsubmitted Done Reply Inline Actions If that's OK, I'll do it in a separate patch. andwar: If that's OK, I'll do it in a separate patch.
		sdesmalenUnsubmitted Not Done Reply Inline Actions Okay, that's fine with me. I guess this requires adding more patterns for shifts in order to test it, so it makes sense to do that in a separate patch. sdesmalen: Okay, that's fine with me. I guess this requires adding more patterns for shifts in order to…
		uint64_t High, SDValue &Imm) {
		if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
		uint64_t ImmVal = CN->getZExtValue();
		SDLoc DL(N);

		if (ImmVal >= Low && ImmVal <= High) {
		Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
		return true;
		}
		}

		return false;
		}

bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {		bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
// tagp(FrameIndex, IRGstack, tag_offset):		// tagp(FrameIndex, IRGstack, tag_offset):
// since the offset between FrameIndex and IRGstack is a compile-time		// since the offset between FrameIndex and IRGstack is a compile-time
// constant, this can be lowered to a single ADDG instruction.		// constant, this can be lowered to a single ADDG instruction.
if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {		if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
return false;		return false;
}		}

▲ Show 20 Lines • Show All 1,487 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 190 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// need to re-interpret the data in SIMD vector registers in big-endian		/// need to re-interpret the data in SIMD vector registers in big-endian
/// mode without emitting such REV instructions.		/// mode without emitting such REV instructions.
NVCAST,		NVCAST,

SMULL,		SMULL,
UMULL,		UMULL,

// Reciprocal estimates and steps.		// Reciprocal estimates and steps.
FRECPE, FRECPS,		FRECPE, FRECPS,
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - FRECPE, FRECPS, - FRSQRTE, FRSQRTS, + FRECPE, + FRECPS, + FRSQRTE, + FRSQRTS, Lint: Pre-merge checks: clang-format: please reformat the code ``` - FRECPE, FRECPS, - FRSQRTE, FRSQRTS, + FRECPE, +…
FRSQRTE, FRSQRTS,		FRSQRTE, FRSQRTS,

SUNPKHI,		SUNPKHI,
SUNPKLO,		SUNPKLO,
UUNPKHI,		UUNPKHI,
UUNPKLO,		UUNPKLO,

CLASTA_N,		CLASTA_N,
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
GLDFF1S_UXTW,		GLDFF1S_UXTW,
GLDFF1S_SXTW,		GLDFF1S_SXTW,
GLDFF1S_UXTW_SCALED,		GLDFF1S_UXTW_SCALED,
GLDFF1S_SXTW_SCALED,		GLDFF1S_SXTW_SCALED,
GLDFF1S_IMM,		GLDFF1S_IMM,

// Non-temporal gather loads		// Non-temporal gather loads
GLDNT1,		GLDNT1,
		GLDNT1_INDEX,
		andwarAuthorUnsubmitted Done Reply Inline Actions The new nodes (`GLDNT1_INDEX` and `SSTN1_INDEX`) are only introduced to keep `performGatherLoadCombine` and `performScatterStoreCombine` relatively clean. But maybe I shouldn't introduce them if they're always meant to be replaced with `SPLAT_VECTOR` + `MUL` + `GLDNT1`? andwar: The new nodes (`GLDNT1_INDEX` and `SSTN1_INDEX`) are only introduced to keep…
GLDNT1S,		GLDNT1S,

// Scatter store		// Scatter store
SST1,		SST1,
SST1_SCALED,		SST1_SCALED,
SST1_UXTW,		SST1_UXTW,
SST1_SXTW,		SST1_SXTW,
SST1_UXTW_SCALED,		SST1_UXTW_SCALED,
SST1_SXTW_SCALED,		SST1_SXTW_SCALED,
SST1_IMM,		SST1_IMM,

// Non-temporal scatter store		// Non-temporal scatter store
SSTNT1,		SSTNT1,
		SSTNT1_INDEX,

// Strict (exception-raising) floating point comparison		// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,		STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,		STRICT_FCMPE,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
▲ Show 20 Lines • Show All 583 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,434 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";		case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
case AArch64ISD::GLDFF1S_SXTW_SCALED:		case AArch64ISD::GLDFF1S_SXTW_SCALED:
return "AArch64ISD::GLDFF1S_SXTW_SCALED";		return "AArch64ISD::GLDFF1S_SXTW_SCALED";
case AArch64ISD::GLDFF1S_UXTW_SCALED:		case AArch64ISD::GLDFF1S_UXTW_SCALED:
return "AArch64ISD::GLDFF1S_UXTW_SCALED";		return "AArch64ISD::GLDFF1S_UXTW_SCALED";
case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";		case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";

case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";		case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
		case AArch64ISD::GLDNT1_INDEX: return "AArch64ISD::GLDNT1_INDEX";
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - case AArch64ISD::GLDNT1_INDEX: return "AArch64ISD::GLDNT1_INDEX"; + case AArch64ISD::GLDNT1_INDEX: + return "AArch64ISD::GLDNT1_INDEX"; Lint: Pre-merge checks: clang-format: please reformat the code ``` - case AArch64ISD::GLDNT1_INDEX: return…
case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";		case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";

case AArch64ISD::SST1: return "AArch64ISD::SST1";		case AArch64ISD::SST1: return "AArch64ISD::SST1";
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";		case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";		case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";		case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";		case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";		case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";		case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";

case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";		case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";
		case AArch64ISD::SSTNT1_INDEX: return "AArch64ISD::SSTNT1_INDEX";
		Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - case AArch64ISD::SSTNT1_INDEX: return "AArch64ISD::SSTNT1_INDEX"; + case AArch64ISD::SSTNT1_INDEX: + return "AArch64ISD::SSTNT1_INDEX"; Lint: Pre-merge checks: clang-format: please reformat the code ``` - case AArch64ISD::SSTNT1_INDEX: return…

case AArch64ISD::LDP: return "AArch64ISD::LDP";		case AArch64ISD::LDP: return "AArch64ISD::LDP";
case AArch64ISD::STP: return "AArch64ISD::STP";		case AArch64ISD::STP: return "AArch64ISD::STP";
case AArch64ISD::STNP: return "AArch64ISD::STNP";		case AArch64ISD::STNP: return "AArch64ISD::STNP";
case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";		case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";		case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
}		}
return nullptr;		return nullptr;
▲ Show 20 Lines • Show All 11,158 Lines • ▼ Show 20 Lines	if (!T->isSized() \|\|
return SDValue();		return SDValue();

SDLoc DL(GN);		SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);		SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,		return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));		DAG.getConstant(MinOffset, DL, MVT::i64));
}		}

		// Turns the vector of indices into a vector of byte offstes by scaling Offset
		// by (BitWidth / 8).
		static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
		SDLoc DL, unsigned BitWidth) {
		assert(Offset.getValueType().isScalableVector() &&
		"This method is only for scalable vectors of offsets");

		SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
		SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);

		return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
		}

static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,		static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,		unsigned Opcode,
bool OnlyPackedOffsets = true) {		bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);		const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);		const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&		assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");		"Scatter stores are only possible for SVE vectors");

Show All 11 Lines	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(4);		SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);		SDValue Offset = N->getOperand(5);

		// For "scalar + vector of indices", just scale the indices. This only
		// applies to non-temporal scatters because there's no instruction that takes
		// indicies.
		if (Opcode == AArch64ISD::SSTNT1_INDEX) {
		Offset =
		sdesmalenUnsubmitted Done Reply Inline Actions getScaledOffsetForLDNT1 is a bit of misnomer in this function, how about `getScaledOffsetForBitwidth()`? sdesmalen: getScaledOffsetForLDNT1 is a bit of misnomer in this function, how about…
		getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
		Opcode = AArch64ISD::SSTNT1;
		}

// In the case of non-temporal gather loads there's only one SVE instruction		// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.		// per data-size: "scalar + vector", i.e.
// * stnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]		// * stnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different		// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.		// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector())		if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector())
std::swap(Base, Offset);		std::swap(Base, Offset);

▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines	static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,

// Depending on the addressing mode, this is either a pointer or a vector of		// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)		// pointers (that fits into one register)
SDValue Base = N->getOperand(3);		SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a		// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)		// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);		SDValue Offset = N->getOperand(4);

		// For "scalar + vector of indices", just scale the indices. This only
		// applies to non-temporal gathers because there's no instruction that takes
		// indicies.
		if (Opcode == AArch64ISD::GLDNT1_INDEX) {
		Offset =
		getScaledOffsetForBitWidth(DAG, Offset, DL, RetElVT.getSizeInBits());
		Opcode = AArch64ISD::GLDNT1;
		}

// In the case of non-temporal gather loads there's only one SVE instruction		// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.		// per data-size: "scalar + vector", i.e.
// * ldnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]		// * ldnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different		// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.		// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector())		if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector())
std::swap(Base, Offset);		std::swap(Base, Offset);

▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_neon_st4lane:		case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);		return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:		case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);		return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:		case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnt1_gather:		case Intrinsic::aarch64_sve_ldnt1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
		case Intrinsic::aarch64_sve_ldnt1_gather_index:
		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_INDEX);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:		case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnf1:		case Intrinsic::aarch64_sve_ldnf1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);
case Intrinsic::aarch64_sve_ldff1:		case Intrinsic::aarch64_sve_ldff1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);		return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);
case Intrinsic::aarch64_sve_stnt1:		case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);		return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:		case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:		case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_stnt1_scatter:		case Intrinsic::aarch64_sve_stnt1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
		case Intrinsic::aarch64_sve_stnt1_scatter_index:
		return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX);
case Intrinsic::aarch64_sve_ld1_gather:		case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);
case Intrinsic::aarch64_sve_ld1_gather_index:		case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:		case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,		return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW,
/OnlyPackedOffsets=/false);		/OnlyPackedOffsets=/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:		case Intrinsic::aarch64_sve_ld1_gather_uxtw:
▲ Show 20 Lines • Show All 764 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -- tablegen ------=//		//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -- tablegen ------=//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// AArch64 Scalable Vector Extension (SVE) Instruction definitions.		// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;		def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
		def SVELShiftImm64 : ComplexPattern<i32, 1, "SelectSVEShiftImm64<0, 64>", []>;

// Non-faulting loads - node definitions		// Non-faulting loads - node definitions
//		//
def SDT_AArch64_LDNF1 : SDTypeProfile<1, 3, [		def SDT_AArch64_LDNF1 : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;		]>;

▲ Show 20 Lines • Show All 112 Lines • ▼ Show 20 Lines
def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>;		def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>;

def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;		def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>;
def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;		def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>;

def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;		def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;

let Predicates = [HasSVE] in {		let Predicates = [HasSVE] in {

defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;		defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;		def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;		defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;		def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;		def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;

defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;		defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;		defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
▲ Show 20 Lines • Show All 934 Lines • ▼ Show 20 Lines	multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;		defm DECP_ZP : sve_int_count_v<0b10100, "decp">;

defm INDEX_RR : sve_int_index_rr<"index", index_vector>;		defm INDEX_RR : sve_int_index_rr<"index", index_vector>;
defm INDEX_IR : sve_int_index_ir<"index", index_vector>;		defm INDEX_IR : sve_int_index_ir<"index", index_vector>;
defm INDEX_RI : sve_int_index_ri<"index", index_vector>;		defm INDEX_RI : sve_int_index_ri<"index", index_vector>;
defm INDEX_II : sve_int_index_ii<"index", index_vector>;		defm INDEX_II : sve_int_index_ii<"index", index_vector>;

// Unpredicated shifts		// Unpredicated shifts
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr">;		defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", sra>;
defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr">;		defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", srl>;
defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl">;		defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", shl>;

		// Patterns for unpredicated left shift by immediate
		def : Pat<(nxv16i8 (shl (nxv16i8 ZPR:$Zs1),
		(nxv16i8 (AArch64dup (vecshiftL8:$imm))))),
		(LSL_ZZI_B ZPR:$Zs1, vecshiftL8:$imm)>;
		def : Pat<(nxv8i16 (shl (nxv8i16 ZPR:$Zs1),
		(nxv8i16 (AArch64dup (vecshiftL16:$imm))))),
		(LSL_ZZI_H ZPR:$Zs1, vecshiftL16:$imm)>;
		def : Pat<(nxv4i32 (shl (nxv4i32 ZPR:$Zs1),
		(nxv4i32 (AArch64dup (vecshiftL32:$imm))))),
		(LSL_ZZI_S ZPR:$Zs1, vecshiftL32:$imm)>;
		def : Pat<(nxv2i64 (shl (nxv2i64 ZPR:$Zs1),
		(nxv2i64 (AArch64dup (i64 (SVELShiftImm64 i32:$imm)))))),
		(LSL_ZZI_D ZPR:$Zs1, vecshiftL64:$imm)>;

defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;		defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;		defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;		defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;

// Predicated shifts		// Predicated shifts
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;		defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;		defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
▲ Show 20 Lines • Show All 894 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,812 Lines • ▼ Show 20 Lines

	multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm> {			multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm> {
	def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>;			def _B : sve_int_bin_cons_shift_wide<0b00, opc, asm, ZPR8>;
	def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>;			def _H : sve_int_bin_cons_shift_wide<0b01, opc, asm, ZPR16>;
	def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>;			def _S : sve_int_bin_cons_shift_wide<0b10, opc, asm, ZPR32>;
	}			}

	class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,			class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
	ZPRRegOp zprty, Operand immtype>			ZPRRegOp zprty, Operand immtype, ValueType vt,
				SDPatternOperator op>
	: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),			: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
	asm, "\t$Zd, $Zn, $imm",			asm, "\t$Zd, $Zn, $imm",
	"", []>, Sched<[]> {			"",
				[(set (vt zprty:$Zd), (op (vt zprty:$Zn), immtype:$imm))]>, Sched<[]> {
	bits<5> Zd;			bits<5> Zd;
	bits<5> Zn;			bits<5> Zn;
	bits<6> imm;			bits<6> imm;
	let Inst{31-24} = 0b00000100;			let Inst{31-24} = 0b00000100;
	let Inst{23-22} = tsz8_64{3-2};			let Inst{23-22} = tsz8_64{3-2};
	let Inst{21} = 0b1;			let Inst{21} = 0b1;
	let Inst{20-19} = tsz8_64{1-0};			let Inst{20-19} = tsz8_64{1-0};
	let Inst{18-16} = imm{2-0}; // imm3			let Inst{18-16} = imm{2-0}; // imm3
	let Inst{15-12} = 0b1001;			let Inst{15-12} = 0b1001;
	let Inst{11-10} = opc;			let Inst{11-10} = opc;
	let Inst{9-5} = Zn;			let Inst{9-5} = Zn;
	let Inst{4-0} = Zd;			let Inst{4-0} = Zd;
	}			}

	multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm> {			multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
	def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;			SDPatternOperator op> {
	def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {			def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8, nxv16i8, op>;
				def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16, nxv8i16, op> {
	let Inst{19} = imm{3};			let Inst{19} = imm{3};
	}			}
	def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {			def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32, nxv4i32, op> {
	let Inst{20-19} = imm{4-3};			let Inst{20-19} = imm{4-3};
	}			}
	def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {			def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64, nxv2i64, op> {
	let Inst{22} = imm{5};			let Inst{22} = imm{5};
	let Inst{20-19} = imm{4-3};			let Inst{20-19} = imm{4-3};
	}			}
	}			}

	multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm> {			multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
	def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;			SDPatternOperator op> {
	def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {			def _B : sve_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8, nxv16i8, op>;
				def _H : sve_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16, nxv8i16, op> {
	let Inst{19} = imm{3};			let Inst{19} = imm{3};
	}			}
	def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {			def _S : sve_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32, nxv4i32, op> {
	let Inst{20-19} = imm{4-3};			let Inst{20-19} = imm{4-3};
	}			}
	def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {			def _D : sve_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64, nxv2i64, op> {
	let Inst{22} = imm{5};			let Inst{22} = imm{5};
	let Inst{20-19} = imm{4-3};			let Inst{20-19} = imm{4-3};
	}			}
	}			}
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Memory - Store Group			// SVE Memory - Store Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	▲ Show 20 Lines • Show All 2,444 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; LDNT1H, LDNT1W, LDNT1D: base + 64-bit index
				; e.g.
				; lsl z0.d, z0.d, #1
				; ldnt1h z0.d, p0/z, [z0.d, x0]
				;

				define <vscale x 2 x i64> @gldnt1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1h_index
				; CHECK: lsl z0.d, z0.d, #1
				; CHECK-NEXT: ldnt1h { z0.d }, p0/z, [z0.d, x0]
				efriedmaUnsubmitted Not Done Reply Inline Actions "mul"? Can we make a shift instead? efriedma: "mul"? Can we make a shift instead?
				andwarAuthorUnsubmitted Done Reply Inline Actions Sadly there are no patterns for `lsl` yet, so I'd have to add a call to `@llvm.aarch64.sve.lsl` for this to wok. It's an option, but I'd prefer to leave a TODO instead (e.g. `// TODO Replace MUL with SHL once patterns for lsl are added)`. andwar: Sadly there are no patterns for `lsl` yet, so I'd have to add a call to `@llvm.aarch64.sve.lsl`…
				efriedmaUnsubmitted Not Done Reply Inline Actions The lsl patterns are pretty simple; see https://reviews.llvm.org/D73602 . But sure, we can leave that for later. efriedma: The lsl patterns are pretty simple; see https://reviews.llvm.org/D73602 . But sure, we can…
				andwarAuthorUnsubmitted Done Reply Inline Actions Fair point, updated. Btw, thanks for the link, but it seems unrelated. Did you have some other patch in mind? andwar: Fair point, updated. Btw, thanks for the link, but it seems unrelated. Did you have some other…
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1w_index
				; CHECK: lsl z0.d, z0.d, #2
				; CHECK-NEXT: ldnt1w { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1d_index
				; CHECK: lsl z0.d, z0.d, #3
				; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gldnt1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1d_index_double
				; CHECK: lsl z0.d, z0.d, #3
				; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x double> %load
				}

				;
				; LDNT1SH, LDNT1SW: base + 64-bit index
				; e.g.
				; lsl z0.d, z0.d, #1
				; ldnt1sh z0.d, p0/z, [z0.d, x0]
				;

				define <vscale x 2 x i64> @gldnt1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1sh_index
				; CHECK: lsl z0.d, z0.d, #1
				; CHECK-NEXT: ldnt1sh { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gldnt1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gldnt1sw_index
				; CHECK: lsl z0.d, z0.d, #2
				; CHECK-NEXT: ldnt1sw { z0.d }, p0/z, [z0.d, x0]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %b)
				%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.gather.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s \| FileCheck %s

				;
				; STNT1H, STNT1W, STNT1D: base + 64-bit index
				; e.g.
				; lsl z1.d, z1.d, #1
				; stnt1h { z0.d }, p0, [z0.d, x0]
				;

				define void @sstnt1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sstnt1h_index
				; CHECK: lsl z1.d, z1.d, #1
				; CHECK-NEXT: stnt1h { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
				call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
				<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sstnt1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sstnt1w_index
				; CHECK: lsl z1.d, z1.d, #2
				; CHECK-NEXT: stnt1w { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
				call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
				<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sstnt1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sstnt1d_index
				; CHECK: lsl z1.d, z1.d, #3
				; CHECK-NEXT: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64> %data,
				<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}

				define void @sstnt1d_index_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: sstnt1d_index_double
				; CHECK: lsl z1.d, z1.d, #3
				; CHECK-NEXT: stnt1d { z0.d }, p0, [z1.d, x0]
				; CHECK-NEXT: ret
				call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double> %data,
				<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %offsets)
				ret void
				}


				declare void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare void @llvm.aarch64.sve.stnt1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal scatters/gathers
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 249660

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for non-temporal scatters/gathersClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 249660

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-gather-loads-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-nt-scatter-stores-64bit-scaled-offset.ll

[AArch64][SVE] Add intrinsics for non-temporal scatters/gathers
ClosedPublic