Diff 231876

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 936 Lines • ▼ Show 20 Lines	let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_SVE_DOT_Indexed_Intrinsic		class AdvSIMD_SVE_DOT_Indexed_Intrinsic
: Intrinsic<[llvm_anyvector_ty],		: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,		[LLVMMatchType<0>,
LLVMSubdivide4VectorType<0>,		LLVMSubdivide4VectorType<0>,
LLVMSubdivide4VectorType<0>,		LLVMSubdivide4VectorType<0>,
llvm_i32_ty],		llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;

		class AdvSIMD_GatherLoad_64bitOffset_Intrinsic
		: Intrinsic<[llvm_anyvector_ty],
		[
		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
		LLVMPointerToElt<0>,
		LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
		],
		[IntrReadMem, IntrArgMemOnly]>;

// This class of intrinsics are not intended to be useful within LLVM IR but		// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.		// are instead here to support some of the more regid parts of the ACLE.
class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>		class Builtin_SVCVT<string name, LLVMType OUT, LLVMType IN>
: GCCBuiltin<"__builtin_sve_" # name>,		: GCCBuiltin<"__builtin_sve_" # name>,
Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>;		Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>;
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 214 Lines • ▼ Show 20 Lines
def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;		def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;

//		//
// Predicate operations		// Predicate operations
//		//

def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;		def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;		def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;

		//
		// Gather loads:
		//

		// scalar + vector, 64 bit unscaled offsets
		def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;

		// scalar + vector, 64 bit scaled offsets
		def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_64bitOffset_Intrinsic;
}		}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 192 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {

SUNPKHI,		SUNPKHI,
SUNPKLO,		SUNPKLO,
UUNPKHI,		UUNPKHI,
UUNPKLO,		UUNPKLO,

INSR,		INSR,

		// Unsigned gather loads.
		GLD1,
		GLD1_SCALED,

// NEON Load/Store with post-increment base updates		// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,		LD3post,
LD4post,		LD4post,
ST2post,		ST2post,
ST3post,		ST3post,
ST4post,		ST4post,
LD1x2post,		LD1x2post,
Show All 13 Lines	enum NodeType : unsigned {
ST2LANEpost,		ST2LANEpost,
ST3LANEpost,		ST3LANEpost,
ST4LANEpost,		ST4LANEpost,

STG,		STG,
STZG,		STZG,
ST2G,		ST2G,
STZ2G		STZ2G

sdesmalenUnsubmitted Done Reply Inline Actions nit: unrelated change. sdesmalen: nit: unrelated change.
};		};

} // end namespace AArch64ISD		} // end namespace AArch64ISD

namespace {		namespace {

// Any instruction that defines a 32-bit result zeros out the high half of the		// Any instruction that defines a 32-bit result zeros out the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may		// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
▲ Show 20 Lines • Show All 538 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 608 Lines • ▼ Show 20 Lines	AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_TO_UINT);		setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);		setTargetDAGCombine(ISD::FDIV);

setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);		setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

setTargetDAGCombine(ISD::ANY_EXTEND);		setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);		setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);		setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::BITCAST);		setTargetDAGCombine(ISD::BITCAST);
		sdesmalenUnsubmitted Done Reply Inline Actions Is this change supposed to be here? sdesmalen: Is this change supposed to be here?
		andwarAuthorUnsubmitted Done Reply Inline Actions Removed, thanks! andwar: Removed, thanks!
setTargetDAGCombine(ISD::CONCAT_VECTORS);		setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);		setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())		if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);		setTargetDAGCombine(ISD::LOAD);

setTargetDAGCombine(ISD::MUL);		setTargetDAGCombine(ISD::MUL);

setTargetDAGCombine(ISD::SELECT);		setTargetDAGCombine(ISD::SELECT);
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines	for (MVT VT : { MVT::v4f16, MVT::v2f32,
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);		setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
}		}

setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);		setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);		setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled		// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.		// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {		for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
		sdesmalenUnsubmitted Done Reply Inline Actions is this change supposed to be here? sdesmalen: is this change supposed to be here?
		andwarAuthorUnsubmitted Done Reply Inline Actions Good catch, thanks, removed! andwar: Good catch, thanks, removed!
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);		setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32) {		if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);		setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);		setOperationAction(ISD::MULHU, VT, Legal);
} else {		} else {
setOperationAction(ISD::MULHS, VT, Expand);		setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);		setOperationAction(ISD::MULHU, VT, Expand);
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)		if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}		}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
}		}

PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();		PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}		}
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unrelated change. sdesmalen: nit: unrelated change.

void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {		void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
assert(VT.isVector() && "VT should be a vector type");		assert(VT.isVector() && "VT should be a vector type");

if (VT.isFloatingPoint()) {		if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();		MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);		setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);		setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
▲ Show 20 Lines • Show All 486 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::STZG: return "AArch64ISD::STZG";		case AArch64ISD::STZG: return "AArch64ISD::STZG";
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";		case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";		case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";		case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";		case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";		case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";		case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
case AArch64ISD::INSR: return "AArch64ISD::INSR";		case AArch64ISD::INSR: return "AArch64ISD::INSR";
		case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
		case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
}		}
return nullptr;		return nullptr;
}		}

MachineBasicBlock *		MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,		AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {		MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a		// We materialise the F128CSEL pseudo-instruction as some control flow and a
▲ Show 20 Lines • Show All 1,636 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,

if (StoreNode->isTruncatingStore()) {		if (StoreNode->isTruncatingStore()) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);		return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}		}

return SDValue();		return SDValue();
}		}

SDValue AArch64TargetLowering::LowerOperation(SDValue Op,		SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
		sdesmalenUnsubmitted Done Reply Inline Actions nit: unrelated change. sdesmalen: nit: unrelated change.
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");		LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());		LLVM_DEBUG(Op.dump());

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default:		default:
llvm_unreachable("unimplemented operand");		llvm_unreachable("unimplemented operand");
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 8,755 Lines • ▼ Show 20 Lines	if (!T->isSized() \|\|
return SDValue();		return SDValue();

SDLoc DL(GN);		SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);		SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,		return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));		DAG.getConstant(MinOffset, DL, MVT::i64));
}		}

		// Returns an SVE type that ContentTy can be trivially sign or zero extended
		sdesmalenUnsubmitted Done Reply Inline Actions The packed vector type is not defined by the number of elements, but rather the element type, so should take `MVT EltTy` instead. sdesmalen: The packed vector type is not defined by the number of elements, but rather the element type…
		// into.
		static MVT getSVEContainerType(EVT ContentTy) {
		assert(ContentTy.isSimple() && "No SVE containers for extended types");

		switch (ContentTy.getSimpleVT().SimpleTy) {
		default:
		llvm_unreachable("No known SVE container for this MVT type");
		case MVT::nxv2i8:
		case MVT::nxv2i16:
		case MVT::nxv2i32:
		case MVT::nxv2i64:
		case MVT::nxv2f32:
		case MVT::nxv2f64:
		return MVT::nxv2i64;
		case MVT::nxv4i8:
		case MVT::nxv4i16:
		case MVT::nxv4i32:
		case MVT::nxv4f32:
		return MVT::nxv4i32;
		}
		}

		static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
		unsigned Opcode) {
		EVT RetVT = N->getValueType(0);
		assert(RetVT.isScalableVector() &&
		"Gather loads are only possible for SVE vectors");

		sdesmalenUnsubmitted Done Reply Inline Actions Can we move this functionality into a separate function? e.g. something like `MVT getPackedIntegerSVEType(MVT EltTy)`. That should simplify this function quite a bit. sdesmalen: Can we move this functionality into a separate function? e.g. something like `MVT…
		SDLoc DL(N);
		MVT RetElVT = RetVT.getVectorElementType().getSimpleVT();
		unsigned NumElements = AArch64::SVEBitsPerBlock / RetElVT.getSizeInBits();

		EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements);
		if (RetVT.getSizeInBits().getKnownMinSize() >
		MaxVT.getSizeInBits().getKnownMinSize())
		return SDValue();

		// Depending on the addressing mode, this is either a pointer or a vector of
		// pointers (that fits into one register)
		const SDValue Base = N->getOperand(3);
		// Depending on the addressing mode, this is either a single offset or a
		// vector of offsets (that fits into one register)
		const SDValue Offset = N->getOperand(4);

		if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) \|\|
		!DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType()))
		return SDValue();

		// Return value type that is representable in hardware
		EVT HwRetVt = getSVEContainerType(RetVT);

		// Keep the original output value type around - this will better inform
		// optimisations (e.g. instruction folding when load is followed by
		// zext/sext). This will only be used for ints, so the value for FPs
		// doesn't matter.
		SDValue OutVT = DAG.getValueType(RetVT);
		if (RetVT.isFloatingPoint())
		OutVT = DAG.getValueType(HwRetVt);

		SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
		SDValue Ops[] = {N->getOperand(0), // Chain
		N->getOperand(2), // Pg
		Base, Offset, OutVT};

		SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
		SDValue LoadChain = SDValue(Load.getNode(), 1);
		sdesmalenUnsubmitted Done Reply Inline Actions Can this code not simply do a BITCAST to `OutVT`? sdesmalen: Can this code not simply do a BITCAST to `OutVT`?
		andwarAuthorUnsubmitted Done Reply Inline Actions Good catch, thanks! Updated. andwar: Good catch, thanks! Updated.

		if (RetVT.isInteger() && (RetVT != HwRetVt))
		Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));

		// If the original return value was FP, bitcast accordingly. Doing it here
		// means that we can avoid adding TableGen patterns for FPs.
		if (RetVT.isFloatingPoint())
		Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));

		return DAG.getMergeValues({Load, LoadChain}, DL);
		}

SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,		SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {		DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {		switch (N->getOpcode()) {
default:		default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");		LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;		break;
case ISD::ADD:		case ISD::ADD:
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	case ISD::INTRINSIC_W_CHAIN:
case Intrinsic::aarch64_neon_st4:		case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:		case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:		case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:		case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:		case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:		case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:		case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);		return performNEONPostLDSTCombine(N, DCI, DAG);
		case Intrinsic::aarch64_sve_ld1_gather:
		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
		case Intrinsic::aarch64_sve_ld1_gather_index:
		return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED);
		fpetrogalliUnsubmitted Not Done Reply Inline Actions Nit: the style of the file seems to be more of having a single invocation of the function shared by the two N->getOpcode(), with the ISD node selection inside the function. static SDValue performLD1GatherCombine(SDNode N, SelectionDAG &DAG, ) { unsigned Opcode; switch(N->getOpcode()) { default: llvm_unreachable(); // <- this would guarantee that the function is not invoked on something that it cannot handle yet? case case Intrinsic::aarch64_sve_ld1_gather: Opcode = AArch64ISD::GLD1; break; case ... } EVT RetVT = N->getValueType(0); assert(RetVT.isScalableVector() && "Gather loads are only possible for SVE vectors"); } //... case Intrinsic::aarch64_sve_ld1_gather: case Intrinsic::aarch64_sve_ld1_gather_index: return performLD1GatherCombine(N, DAG); // ... fpetrogalli:* Nit: the style of the file seems to be more of having a single invocation of the function…
		andwarAuthorUnsubmitted Done Reply Inline Actions Good point! However, that would lead to 2 separate switch statements with similar cases (i.e. code duplication). In other words, either way it won't be ideal. I would like to keep the current implementation for now. andwar: Good point! However, that would lead to 2 separate switch statements with similar cases (i.e.
default:		default:
break;		break;
}		}
break;		break;
case ISD::GlobalAddress:		case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());		return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}		}
return SDValue();		return SDValue();
▲ Show 20 Lines • Show All 645 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 352 Lines • ▼ Show 20 Lines
	def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;			def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
	def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;			def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
	def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;			def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
	def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;			def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;

	def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;			def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
	def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;			def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;

				def UImmS2XForm : SDNodeXForm<imm, [{
				return CurDAG->getTargetConstant(N->getZExtValue() / 2, SDLoc(N), MVT::i64);
				}]>;
				def UImmS4XForm : SDNodeXForm<imm, [{
				return CurDAG->getTargetConstant(N->getZExtValue() / 4, SDLoc(N), MVT::i64);
				}]>;
				def UImmS8XForm : SDNodeXForm<imm, [{
				return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64);
				}]>;

	// uimm5sN predicate - True if the immediate is a multiple of N in the range			// uimm5sN predicate - True if the immediate is a multiple of N in the range
	// [0 * N, 32 * N].			// [0 * N, 32 * N].
	def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;			def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
	def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>;			def UImm5s4Operand : UImmScaledMemoryIndexed<5, 4>;
	def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>;			def UImm5s8Operand : UImmScaledMemoryIndexed<5, 8>;

	def uimm5s2 : Operand<i64>, ImmLeaf<i64,			def uimm5s2 : Operand<i64>, ImmLeaf<i64,
	[{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }]> {			[{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }],
				UImmS2XForm> {
	let ParserMatchClass = UImm5s2Operand;			let ParserMatchClass = UImm5s2Operand;
	let PrintMethod = "printImmScale<2>";			let PrintMethod = "printImmScale<2>";
	}			}
	def uimm5s4 : Operand<i64>, ImmLeaf<i64,			def uimm5s4 : Operand<i64>, ImmLeaf<i64,
	[{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }]> {			[{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }],
				UImmS4XForm> {
	let ParserMatchClass = UImm5s4Operand;			let ParserMatchClass = UImm5s4Operand;
	let PrintMethod = "printImmScale<4>";			let PrintMethod = "printImmScale<4>";
	}			}
	def uimm5s8 : Operand<i64>, ImmLeaf<i64,			def uimm5s8 : Operand<i64>, ImmLeaf<i64,
	[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }]> {			[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
				UImmS8XForm> {
	let ParserMatchClass = UImm5s8Operand;			let ParserMatchClass = UImm5s8Operand;
	let PrintMethod = "printImmScale<8>";			let PrintMethod = "printImmScale<8>";
	}			}

	// uimm6sN predicate - True if the immediate is a multiple of N in the range			// uimm6sN predicate - True if the immediate is a multiple of N in the range
	// [0 * N, 64 * N].			// [0 * N, 64 * N].
	def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;			def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
	def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;			def UImm6s2Operand : UImmScaledMemoryIndexed<6, 2>;
	▲ Show 20 Lines • Show All 10,378 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -- tablegen ------=//		//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -- tablegen ------=//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// AArch64 Scalable Vector Extension (SVE) Instruction definitions.		// AArch64 Scalable Vector Extension (SVE) Instruction definitions.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

		def SDT_AArch64_GLD1 : SDTypeProfile<1, 4, [
		SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
		SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
		]>;

		def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
		def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

let Predicates = [HasSVE] in {		let Predicates = [HasSVE] in {

def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;		def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">;
def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;		def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
def RDFFR_P : sve_int_rdffr_unpred<"rdffr">;		def RDFFR_P : sve_int_rdffr_unpred<"rdffr">;
def SETFFR : sve_int_setffr<"setffr">;		def SETFFR : sve_int_setffr<"setffr">;
		sdesmalenUnsubmitted Done Reply Inline Actions SVEAddrModeRegReg8 is not used anywhere, please remove. sdesmalen: SVEAddrModeRegReg8 is not used anywhere, please remove.
def WRFFR : sve_int_wrffr<"wrffr">;		def WRFFR : sve_int_wrffr<"wrffr">;

defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;		defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;		defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;		defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>;		defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>;
defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>;		defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>;
defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>;		defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>;
▲ Show 20 Lines • Show All 422 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {
defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>;		defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4>;
defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>;		defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4>;
defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>;		defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4>;
defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>;		defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8>;
defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>;		defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8>;

// Gathers using unscaled 64-bit offsets, e.g.		// Gathers using unscaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d]		// ld1h z0.d, p0/z, [x0, z0.d]
defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb">;		defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", null_frag, nxv2i8>;
defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb">;		defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", null_frag, nxv2i8>;
defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b">;		defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b">;		defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", null_frag, nxv2i8>;
defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh">;		defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", null_frag, nxv2i16>;
defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh">;		defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", null_frag, nxv2i16>;
defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h">;		defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h">;		defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", null_frag, nxv2i16>;
defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw">;		defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", null_frag, nxv2i32>;
defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw">;		defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", null_frag, nxv2i32>;
defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w">;		defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w">;		defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", null_frag, nxv2i32>;
defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d">;		defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d">;		defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", null_frag, nxv2i64>;

// Gathers using scaled 64-bit offsets, e.g.		// Gathers using scaled 64-bit offsets, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]		// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", ZPR64ExtLSL16>;		defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", null_frag, ZPR64ExtLSL16, nxv2i16>;
defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", ZPR64ExtLSL16>;		defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", null_frag, ZPR64ExtLSL16, nxv2i16>;
defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", ZPR64ExtLSL16>;		defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", ZPR64ExtLSL16>;		defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", null_frag, ZPR64ExtLSL16, nxv2i16>;
defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", ZPR64ExtLSL32>;		defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", null_frag, ZPR64ExtLSL32, nxv2i32>;
defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", ZPR64ExtLSL32>;		defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", null_frag, ZPR64ExtLSL32, nxv2i32>;
defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", ZPR64ExtLSL32>;		defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", ZPR64ExtLSL32>;		defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", null_frag, ZPR64ExtLSL32, nxv2i32>;
defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", ZPR64ExtLSL64>;		defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", ZPR64ExtLSL64>;		defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", null_frag, ZPR64ExtLSL64, nxv2i64>;

// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.		// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]		// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;		defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;		defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;		defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;		defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;		defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
▲ Show 20 Lines • Show All 639 Lines • ▼ Show 20 Lines
// 8-element contiguous stores		// 8-element contiguous stores
defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;		defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;
defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;		defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;		defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;

// 16-element contiguous stores		// 16-element contiguous stores
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;		defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;

}		}
		andwarAuthorUnsubmitted Done Reply Inline Actions I missed this, sorry! I will remove it in the next patch. andwar: I missed this, sorry! I will remove it in the next patch.
		sdesmalenUnsubmitted Done Reply Inline Actions please remove. sdesmalen: please remove.

let Predicates = [HasSVE2] in {		let Predicates = [HasSVE2] in {
// SVE2 integer multiply-add (indexed)		// SVE2 integer multiply-add (indexed)
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;		defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;		defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;

// SVE2 saturating multiply-add high (indexed)		// SVE2 saturating multiply-add high (indexed)
defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;		defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
▲ Show 20 Lines • Show All 399 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

Show First 20 Lines • Show All 5,285 Lines • ▼ Show 20 Lines	multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;		def _SXTW_REAL : sve_mem_32b_gld_sv<opc, 1, 0, asm, sxtw_opnd>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _UXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
}		}


sdesmalenUnsubmitted Done Reply Inline Actions nit: unrelated change. sdesmalen: nit: unrelated change.
class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>		class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5),		: I<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5),
asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",		asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
bits<3> Pg;		bits<3> Pg;
bits<5> Zn;		bits<5> Zn;
bits<5> Zt;		bits<5> Zt;
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines	multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _UXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;		(!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
}		}

multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,		multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
RegisterOperand zprext> {		SDPatternOperator op,
		RegisterOperand zprext, ValueType vt> {
def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;		def _SCALED_REAL : sve_mem_64b_gld_sv<opc, 1, 1, 1, asm, zprext>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;		(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;

		def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
		(!cast<Instruction>(NAME # _SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}		}

multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm> {		multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
		SDPatternOperator op, ValueType vt> {
def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;		def _REAL : sve_mem_64b_gld_sv<opc, 1, 0, 1, asm, ZPR64ExtLSL8>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",		def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;		(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;

		def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
		(!cast<Instruction>(NAME # _REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}		}

		andwarAuthorUnsubmitted Done Reply Inline Actions This `Pseudo` is not needed. I will remove it in the next patch. andwar: This `Pseudo` is not needed. I will remove it in the next patch.
class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>		class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5),		: I<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5),
asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",		asm, "\t$Zt, $Pg/z, [$Zn, $imm5]",
"",		"",
[]>, Sched<[]> {		[]>, Sched<[]> {
bits<3> Pg;		bits<3> Pg;
bits<5> Zn;		bits<5> Zn;
bits<5> Zt;		bits<5> Zt;
▲ Show 20 Lines • Show All 514 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

Show First 20 Lines • Show All 637 Lines • ▼ Show 20 Lines	enum TOF {
/// On a FrameIndex operand, indicates that the underlying memory is tagged		/// On a FrameIndex operand, indicates that the underlying memory is tagged
/// with an unknown tag value (MTE); this needs to be lowered either to an		/// with an unknown tag value (MTE); this needs to be lowered either to an
/// SP-relative load or store instruction (which do not check tags), or to		/// SP-relative load or store instruction (which do not check tags), or to
/// an LDG instruction to obtain the tag value.		/// an LDG instruction to obtain the tag value.
MO_TAGGED = 0x400,		MO_TAGGED = 0x400,
};		};
} // end namespace AArch64II		} // end namespace AArch64II

		namespace AArch64 {
		// The number of bits in a SVE register is architecturally defined
		// to be a multiple of this value. If <M x t> has this number of bits,
		// a <n x M x t> vector can be stored in a SVE register without any
		// redundant bits. If <M x t> has this number of bits divided by P,
		// a <n x M x t> vector is stored in a SVE register by placing index i
		// in index iP of a <n x (MP) x t> vector. The other elements of the
		// <n x (M*P) x t> vector (such as index 1) are undefined.
		static constexpr unsigned SVEBitsPerBlock = 128;
		fpetrogalliUnsubmitted Done Reply Inline Actions `static constexpr unsigned` should make sure that we don't run into duplicate variable declaration if the header get's included somewhere else (admittedly, an unlikely situation in this specific case). fpetrogalli: `static constexpr unsigned ` should make sure that we don't run into duplicate variable…
		andwarAuthorUnsubmitted Done Reply Inline Actions Good point, updated! andwar: Good point, updated!
		} // end namespace AArch64

} // end namespace llvm		} // end namespace llvm

#endif		#endif

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1H, LD1W, LD1D: base + 64-bit scaled offset
				; e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
				;

				define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1h_index
				; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
				; CHECK-NEXT: mov w8, #65535
				; CHECK-NEXT: mov z1.d, x8
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1w_index
				; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: mov z1.d, x8
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1d_index
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1d_index_double
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x double> %load
				}

				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll

This file was added.

				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				;
				; LD1B, LD1W, LD1H, LD1D: base + 64-bit unscaled offset
				; e.g. ld1h { z0.d }, p0/z, [x0, z0.d]
				;

				define <vscale x 2 x i64> @gld1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1b_d:
				; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: mov w8, #255
				; CHECK-NEXT: mov z1.d, x8
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
				i8* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gld1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1h_d:
				; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: mov w8, #65535
				; CHECK-NEXT: mov z1.d, x8
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
				i16* %base,
				<vscale x 2 x i64> %b)
				%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gld1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
				; CHECK-LABEL: gld1w_d:
				; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: mov z1.d, x8
				; CHECK-NEXT: and z0.d, z0.d, z1.d
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
				i32* %base,
				<vscale x 2 x i64> %offsets)
				%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
				ret <vscale x 2 x i64> %res
				}

				define <vscale x 2 x i64> @gld1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1d_d:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %pg,
				i64* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x i64> %load
				}

				define <vscale x 2 x double> @gld1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
				; CHECK-LABEL: gld1d_d_double:
				; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
				; CHECK-NEXT: ret
				%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %pg,
				double* %base,
				<vscale x 2 x i64> %b)
				ret <vscale x 2 x double> %load
				}

				declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
				declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
				declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
				declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
				declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for gather loads with 64-bit offsets
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 231876

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add intrinsics for gather loads with 64-bit offsetsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 231876

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll

llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll

[AArch64][SVE] Add intrinsics for gather loads with 64-bit offsets
ClosedPublic