Diff 398900

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,790 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// Otherwise, return the output of the first CSEL.		// Otherwise, return the output of the first CSEL.
return CS1;		return CS1;
}		}

SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,		SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
EVT Ty = Op.getValueType();		EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);		auto Idx = Op.getConstantOperandAPInt(2);
		int64_t IdxVal = Idx.getSExtValue();
		assert(Ty.isScalableVector() &&
		"Only expect scalable vectors for custom lowering of VECTOR_SPLICE");

		// We can use the splice instruction for certain index values where we are
		// able to efficiently generate the correct predicate. The index will be
		// inverted and used directly as the input to the ptrue instruction, i.e.
		// -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
		// splice predicate. However, we can only do this if we can guarantee that
		// there are enough elements in the vector, hence we check the index <= min
		// number of elements.
		Optional<unsigned> PredPattern;
		if (Ty.isScalableVector() && IdxVal < 0 &&
		(PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
		None) {
		SDLoc DL(Op);

		// Create a predicate where all but the last -IdxVal elements are false.
		EVT PredVT = Ty.changeVectorElementType(MVT::i1);
		SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
		Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);

		// Now splice the two inputs together using the predicate.
		return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
		Op.getOperand(1));
		}

// This will select to an EXT instruction, which has a maximum immediate		// This will select to an EXT instruction, which has a maximum immediate
// value of 255, hence 2048-bits is the maximum value we can lower.		// value of 255, hence 2048-bits is the maximum value we can lower.
if (Idx.sge(-1) && Idx.slt(2048 / Ty.getVectorElementType().getSizeInBits()))		if (IdxVal >= 0 &&
		IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;		return Op;

return SDValue();		return SDValue();
}		}

SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,		SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: `IdxVal < 0 && abs(IdxVal) <= 8` sdesmalen: nit: `IdxVal < 0 && abs(IdxVal) <= 8`
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
		sdesmalenUnsubmitted Not Done Reply Inline Actions I don't think this check is necessary? sdesmalen: I don't think this check is necessary?
		david-armAuthorUnsubmitted Done Reply Inline Actions Unforunately, it is necessary for correctness because the ptrue behaviour changes completely when you specify a fixed pattern that exceeds the number of elements. For example, if we do something like this: ptrue p0.d, vl4 and your vector length=128 bits, then ptrue actually returns an all-false predicate! So the maximum we can do safely without knowing vscale is: ptrue p0.d, vl2 david-arm: Unforunately, it is necessary for correctness because the ptrue behaviour changes completely…
		sdesmalenUnsubmitted Not Done Reply Inline Actions If that happens it means that the original IR was incorrect, because it should never have generated a splice with offset `-4` for a `vscale x 2 x eltty> if vscale can be lower than 2. This can be verified in the IR Verifier with an extra check on `vscale_range` , but it shouldn't be part of the check here. sdesmalen: If that happens it means that the original IR was incorrect, because it should never have…
		david-armAuthorUnsubmitted Done Reply Inline Actions OK, in that case maybe I can just change this to an assert and I will add code to the verifier as part of this patch if it's not already there. david-arm: OK, in that case maybe I can just change this to an assert and I will add code to the verifier…
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();		ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue LHS = Op.getOperand(0);		SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);		SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);		SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);		SDValue FVal = Op.getOperand(3);
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: can you use abs(IdxVal) instead? (I personally find that slightly more readable) sdesmalen: nit: can you use abs(IdxVal) instead? (I personally find that slightly more readable)
SDLoc DL(Op);		SDLoc DL(Op);
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);		return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}		}

SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,		SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDValue CCVal = Op->getOperand(0);		SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);		SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);		SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);		SDLoc DL(Op);

EVT Ty = Op.getValueType();		EVT Ty = Op.getValueType();
if (Ty.isScalableVector()) {		if (Ty.isScalableVector()) {
SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);		SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());		MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
		sdesmalenUnsubmitted Not Done Reply Inline Actions Idx == -1 is already handled above? sdesmalen: Idx == -1 is already handled above?
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);		SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);		return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}		}

if (useSVEForFixedLengthVectorVT(Ty)) {		if (useSVEForFixedLengthVectorVT(Ty)) {
// FIXME: Ideally this would be the same as above using i1 types, however		// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so		// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.		// instead extend the predicate to a result type sized integer vector.
▲ Show 20 Lines • Show All 3,168 Lines • ▼ Show 20 Lines	if (InVT.isScalableVector()) {
return SDValue();		return SDValue();
}		}

if (Idx == 0 && isPackedVectorType(VT, DAG)) {		if (Idx == 0 && isPackedVectorType(VT, DAG)) {
// This will be matched by custom code during ISelDAGToDAG.		// This will be matched by custom code during ISelDAGToDAG.
if (Vec0.isUndef())		if (Vec0.isUndef())
return Op;		return Op;

unsigned int PredPattern =		Optional<unsigned> PredPattern =
getSVEPredPatternFromNumElements(InVT.getVectorNumElements());		getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
auto PredTy = VT.changeVectorElementType(MVT::i1);		auto PredTy = VT.changeVectorElementType(MVT::i1);
SDValue PTrue = getPTrue(DAG, DL, PredTy, PredPattern);		SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);		SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);		return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
}		}

return SDValue();		return SDValue();
}		}

static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {		static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
▲ Show 20 Lines • Show All 1,288 Lines • ▼ Show 20 Lines	bool AArch64TargetLowering::lowerInterleavedLoad(

// Holds sub-vectors extracted from the load intrinsic return values. The		// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will		// sub-vectors are associated with the shufflevector instructions they will
// replace.		// replace.
DenseMap<ShuffleVectorInst , SmallVector<Value , 4>> SubVecs;		DenseMap<ShuffleVectorInst , SmallVector<Value , 4>> SubVecs;

Value *PTrue = nullptr;		Value *PTrue = nullptr;
if (UseScalable) {		if (UseScalable) {
unsigned PgPattern =		Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(FVTy->getNumElements());		getSVEPredPatternFromNumElements(FVTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==		if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&		Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))		Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
PgPattern = AArch64SVEPredPattern::all;		PgPattern = AArch64SVEPredPattern::all;

auto *PTruePat =		auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), PgPattern);		ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},		PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});		{PTruePat});
}		}

for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {		for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {

// If we're generating more than one load, compute the base address of		// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.		// subsequent loads as an offset from the previous.
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines	if (UseScalable)
StNFunc = Intrinsic::getDeclaration(SI->getModule(),		StNFunc = Intrinsic::getDeclaration(SI->getModule(),
SVEStoreIntrs[Factor - 2], {STVTy});		SVEStoreIntrs[Factor - 2], {STVTy});
else		else
StNFunc = Intrinsic::getDeclaration(		StNFunc = Intrinsic::getDeclaration(
SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});		SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});

Value *PTrue = nullptr;		Value *PTrue = nullptr;
if (UseScalable) {		if (UseScalable) {
unsigned PgPattern =		Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(SubVecTy->getNumElements());		getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==		if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&		Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() ==		Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(SubVecTy))		DL.getTypeSizeInBits(SubVecTy))
PgPattern = AArch64SVEPredPattern::all;		PgPattern = AArch64SVEPredPattern::all;

auto *PTruePat =		auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), PgPattern);		ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},		PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});		{PTruePat});
}		}

for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {		for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {

SmallVector<Value *, 5> Ops;		SmallVector<Value *, 5> Ops;

▲ Show 20 Lines • Show All 6,227 Lines • ▼ Show 20 Lines

// Return a PTRUE with active lanes corresponding to the extent of VT.		// Return a PTRUE with active lanes corresponding to the extent of VT.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,		static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {		EVT VT) {
assert(VT.isFixedLengthVector() &&		assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&		DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");		"Expected legal fixed length vector!");

unsigned PgPattern =		Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(VT.getVectorNumElements());		getSVEPredPatternFromNumElements(VT.getVectorNumElements());
assert(PgPattern && "Unexpected element count for SVE predicate");		assert(PgPattern && "Unexpected element count for SVE predicate");

// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use		// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
// AArch64SVEPredPattern::all, which can enable the use of unpredicated		// AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.		// variants of instructions when available.
const auto &Subtarget =		const auto &Subtarget =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget());		static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
Show All 19 Lines	case MVT::f32:
MaskVT = MVT::nxv4i1;		MaskVT = MVT::nxv4i1;
break;		break;
case MVT::i64:		case MVT::i64:
case MVT::f64:		case MVT::f64:
MaskVT = MVT::nxv2i1;		MaskVT = MVT::nxv2i1;
break;		break;
}		}

return getPTrue(DAG, DL, MaskVT, PgPattern);		return getPTrue(DAG, DL, MaskVT, *PgPattern);
}		}

static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,		static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {		EVT VT) {
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&		assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal scalable vector!");		"Expected legal scalable vector!");
auto PredTy = VT.changeVectorElementType(MVT::i1);		auto PredTy = VT.changeVectorElementType(MVT::i1);
return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);		return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
▲ Show 20 Lines • Show All 983 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

Show First 20 Lines • Show All 446 Lines • ▼ Show 20 Lines

namespace AArch64SVEPredPattern {		namespace AArch64SVEPredPattern {
struct SVEPREDPAT {		struct SVEPREDPAT {
const char *Name;		const char *Name;
uint16_t Encoding;		uint16_t Encoding;
};		};
#define GET_SVEPREDPAT_DECL		#define GET_SVEPREDPAT_DECL
#include "AArch64GenSystemOperands.inc"		#include "AArch64GenSystemOperands.inc"
}		}
		sdesmalenUnsubmitted Done Reply Inline Actions Instead of making some arbitrary value 'invalid', maybe just return an `Optional<unsigned>` from `getSVEPredPatternFromNumElements` instead. Because any value that's not between 0 and 31 inclusive, is invalid. As an additional benefit, the result value of other uses of `getSVEPredPatternFromNumElements` in AArch64ISelLowering are checked validity too. sdesmalen: Instead of making some arbitrary value 'invalid', maybe just return an `Optional<unsigned>`…

/// Return the number of active elements for VL1 to VL256 predicate pattern,		/// Return the number of active elements for VL1 to VL256 predicate pattern,
/// zero for all other patterns.		/// zero for all other patterns.
inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {		inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {
switch (Pattern) {		switch (Pattern) {
default:		default:
return 0;		return 0;
case AArch64SVEPredPattern::vl1:		case AArch64SVEPredPattern::vl1:
Show All 14 Lines	inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {
case AArch64SVEPredPattern::vl128:		case AArch64SVEPredPattern::vl128:
return 128;		return 128;
case AArch64SVEPredPattern::vl256:		case AArch64SVEPredPattern::vl256:
return 256;		return 256;
}		}
}		}

/// Return specific VL predicate pattern based on the number of elements.		/// Return specific VL predicate pattern based on the number of elements.
inline unsigned getSVEPredPatternFromNumElements(unsigned MinNumElts) {		inline Optional<unsigned>
		getSVEPredPatternFromNumElements(unsigned MinNumElts) {
switch (MinNumElts) {		switch (MinNumElts) {
default:		default:
llvm_unreachable("unexpected element count for SVE predicate");		return None;
case 1:		case 1:
return AArch64SVEPredPattern::vl1;
case 2:		case 2:
return AArch64SVEPredPattern::vl2;		case 3:
case 4:		case 4:
return AArch64SVEPredPattern::vl4;		case 5:
		case 6:
		case 7:
case 8:		case 8:
return AArch64SVEPredPattern::vl8;		return MinNumElts;
case 16:		case 16:
return AArch64SVEPredPattern::vl16;		return AArch64SVEPredPattern::vl16;
case 32:		case 32:
return AArch64SVEPredPattern::vl32;		return AArch64SVEPredPattern::vl32;
case 64:		case 64:
return AArch64SVEPredPattern::vl64;		return AArch64SVEPredPattern::vl64;
case 128:		case 128:
return AArch64SVEPredPattern::vl128;		return AArch64SVEPredPattern::vl128;
▲ Show 20 Lines • Show All 258 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

	Show First 20 Lines • Show All 75 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 31)			%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 31)
	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {			define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv2f16_neg_idx:			; CHECK-LABEL: splice_nxv2f16_neg_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d, vl1
	; CHECK-NEXT: lastb d0, p0, z0.d			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: insr z1.d, d0			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)			%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
	ret <vscale x 2 x half> %res			ret <vscale x 2 x half> %res
	}			}

	define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {			define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv2f16_neg2_idx:			; CHECK-LABEL: splice_nxv2f16_neg2_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov x9, #-8
	; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: st1h { z0.h }, p0, [sp]
	; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2)			%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2)
	ret <vscale x 2 x half> %res			ret <vscale x 2 x half> %res
	}			}

	define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {			define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv2f16_first_idx:			; CHECK-LABEL: splice_nxv2f16_first_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 10 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 31)			%res = call <vscale x 2 x half> @llvm.experimental.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 31)
	ret <vscale x 2 x half> %res			ret <vscale x 2 x half> %res
	}			}

	define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {			define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv4f16_neg_idx:			; CHECK-LABEL: splice_nxv4f16_neg_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s, vl1
	; CHECK-NEXT: lastb s0, p0, z0.s			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: insr z1.s, s0			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)			%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
	ret <vscale x 4 x half> %res			ret <vscale x 4 x half> %res
	}			}

	define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {			define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv4f16_neg3_idx:			; CHECK-LABEL: splice_nxv4f16_neg3_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.s, vl3
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov x9, #-6
	; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: st1h { z0.h }, p0, [sp]
	; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3)			%res = call <vscale x 4 x half> @llvm.experimental.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3)
	ret <vscale x 4 x half> %res			ret <vscale x 4 x half> %res
	}			}

	define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {			define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv4f16_first_idx:			; CHECK-LABEL: splice_nxv4f16_first_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 28 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 127)			%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 127)
	ret <vscale x 8 x half> %res			ret <vscale x 8 x half> %res
	}			}

	define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {			define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
	; CHECK-LABEL: splice_nxv2f32_neg_idx:			; CHECK-LABEL: splice_nxv2f32_neg_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d, vl1
	; CHECK-NEXT: lastb d0, p0, z0.d			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: insr z1.d, d0			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1)			%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1)
	ret <vscale x 2 x float> %res			ret <vscale x 2 x float> %res
	}			}

	define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {			define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
	; CHECK-LABEL: splice_nxv2f32_neg2_idx:			; CHECK-LABEL: splice_nxv2f32_neg2_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov x9, #-4
	; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: st1w { z0.s }, p0, [sp]
	; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2)			%res = call <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2)
	ret <vscale x 2 x float> %res			ret <vscale x 2 x float> %res
	}			}

	define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {			define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
	; CHECK-LABEL: splice_nxv2f32_first_idx:			; CHECK-LABEL: splice_nxv2f32_first_idx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines

	;			;
	; VECTOR_SPLICE (trailing elements)			; VECTOR_SPLICE (trailing elements)
	;			;

	define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {			define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
	; CHECK-LABEL: splice_nxv16i8:			; CHECK-LABEL: splice_nxv16i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.b, vl16
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.b, p0.b
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
	; CHECK-NEXT: mov x9, #-16
	; CHECK-NEXT: ptrue p0.b
	; CHECK-NEXT: st1b { z0.b }, p0, [sp]
	; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -16)			%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -16)
	ret <vscale x 16 x i8> %res			ret <vscale x 16 x i8> %res
	}			}

				define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
				; CHECK-LABEL: splice_nxv16i8_neg32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl32
				; CHECK-NEXT: rev p0.b, p0.b
				; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
				; CHECK-NEXT: ret
				%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -32)
				ret <vscale x 16 x i8> %res
				}

				define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #3 {
				; CHECK-LABEL: splice_nxv16i8_neg64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl64
				; CHECK-NEXT: rev p0.b, p0.b
				; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
				; CHECK-NEXT: ret
				%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -64)
				ret <vscale x 16 x i8> %res
				}

				define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #4 {
				; CHECK-LABEL: splice_nxv16i8_neg128:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl128
				; CHECK-NEXT: rev p0.b, p0.b
				; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
				; CHECK-NEXT: ret
				%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -128)
				ret <vscale x 16 x i8> %res
				}

				define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
				; CHECK-LABEL: splice_nxv16i8_neg256:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: rev p0.b, p0.b
				; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
				; CHECK-NEXT: ret
				%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -256)
				ret <vscale x 16 x i8> %res
				}

	define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {			define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
	; CHECK-LABEL: splice_nxv16i8_1:			; CHECK-LABEL: splice_nxv16i8_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.b			; CHECK-NEXT: ptrue p0.b, vl1
	; CHECK-NEXT: lastb b0, p0, z0.b			; CHECK-NEXT: rev p0.b, p0.b
	; CHECK-NEXT: insr z1.b, b0			; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1)			%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1)
	ret <vscale x 16 x i8> %res			ret <vscale x 16 x i8> %res
	}			}

	define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {			define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
	; CHECK-LABEL: splice_nxv16i8_neg17:			; CHECK-LABEL: splice_nxv16i8_neg17:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 15 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -17)			%res = call <vscale x 16 x i8> @llvm.experimental.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -17)
	ret <vscale x 16 x i8> %res			ret <vscale x 16 x i8> %res
	}			}

	define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {			define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
	; CHECK-LABEL: splice_nxv8i16:			; CHECK-LABEL: splice_nxv8i16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.h, vl8
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.h, p0.h
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
	; CHECK-NEXT: mov x9, #-8
	; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: st1h { z0.h }, p0, [sp]
	; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -8)			%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -8)
	ret <vscale x 8 x i16> %res			ret <vscale x 8 x i16> %res
	}			}

	define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {			define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
	; CHECK-LABEL: splice_nxv8i16_1:			; CHECK-LABEL: splice_nxv8i16_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h			; CHECK-NEXT: ptrue p0.h, vl1
	; CHECK-NEXT: lastb h0, p0, z0.h			; CHECK-NEXT: rev p0.h, p0.h
	; CHECK-NEXT: insr z1.h, h0			; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1)			%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1)
	ret <vscale x 8 x i16> %res			ret <vscale x 8 x i16> %res
	}			}

	define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {			define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {
	; CHECK-LABEL: splice_nxv8i16_neg9:			; CHECK-LABEL: splice_nxv8i16_neg9:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 15 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -9)			%res = call <vscale x 8 x i16> @llvm.experimental.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -9)
	ret <vscale x 8 x i16> %res			ret <vscale x 8 x i16> %res
	}			}

	define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {			define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
	; CHECK-LABEL: splice_nxv4i32:			; CHECK-LABEL: splice_nxv4i32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.s, vl4
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov x9, #-4
	; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: st1w { z0.s }, p0, [sp]
	; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -4)			%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -4)
	ret <vscale x 4 x i32> %res			ret <vscale x 4 x i32> %res
	}			}

	define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {			define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
	; CHECK-LABEL: splice_nxv4i32_1:			; CHECK-LABEL: splice_nxv4i32_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s, vl1
	; CHECK-NEXT: lastb s0, p0, z0.s			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: insr z1.s, s0			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1)			%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1)
	ret <vscale x 4 x i32> %res			ret <vscale x 4 x i32> %res
	}			}

	define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {			define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {
	; CHECK-LABEL: splice_nxv4i32_neg5:			; CHECK-LABEL: splice_nxv4i32_neg5:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.s, vl5
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: rdvl x9, #1
	; CHECK-NEXT: cmp x9, #20
	; CHECK-NEXT: mov w10, #20
	; CHECK-NEXT: csel x9, x9, x10, lo
	; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: st1w { z0.s }, p0, [sp]
	; CHECK-NEXT: sub x8, x8, x9
	; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -5)			%res = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -5)
	ret <vscale x 4 x i32> %res			ret <vscale x 4 x i32> %res
	}			}

	define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {			define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
	; CHECK-LABEL: splice_nxv2i64:			; CHECK-LABEL: splice_nxv2i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov x9, #-2
	; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: st1d { z0.d }, p0, [sp]
	; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -2)			%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -2)
	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {			define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
	; CHECK-LABEL: splice_nxv2i64_1:			; CHECK-LABEL: splice_nxv2i64_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d, vl1
	; CHECK-NEXT: lastb d0, p0, z0.d			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: insr z1.d, d0			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1)			%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1)
	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {			define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {
	; CHECK-LABEL: splice_nxv2i64_neg3:			; CHECK-LABEL: splice_nxv2i64_neg3:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl3
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: rdvl x9, #1
	; CHECK-NEXT: cmp x9, #24
	; CHECK-NEXT: mov w10, #24
	; CHECK-NEXT: csel x9, x9, x10, lo
	; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: st1d { z0.d }, p0, [sp]
	; CHECK-NEXT: sub x8, x8, x9
	; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -3)			%res = call <vscale x 2 x i64> @llvm.experimental.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -3)
	ret <vscale x 2 x i64> %res			ret <vscale x 2 x i64> %res
	}			}

	define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {			define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv8f16:			; CHECK-LABEL: splice_nxv8f16:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.h, vl8
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.h, p0.h
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
	; CHECK-NEXT: mov x9, #-8
	; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: st1h { z0.h }, p0, [sp]
	; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -8)			%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -8)
	ret <vscale x 8 x half> %res			ret <vscale x 8 x half> %res
	}			}

	define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {			define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
	; CHECK-LABEL: splice_nxv8f16_1:			; CHECK-LABEL: splice_nxv8f16_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.h			; CHECK-NEXT: ptrue p0.h, vl1
	; CHECK-NEXT: lastb h0, p0, z0.h			; CHECK-NEXT: rev p0.h, p0.h
	; CHECK-NEXT: insr z1.h, h0			; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)			%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
	ret <vscale x 8 x half> %res			ret <vscale x 8 x half> %res
	}			}

	define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {			define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {
	; CHECK-LABEL: splice_nxv8f16_neg9:			; CHECK-LABEL: splice_nxv8f16_neg9:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	Show All 15 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -9)			%res = call <vscale x 8 x half> @llvm.experimental.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -9)
	ret <vscale x 8 x half> %res			ret <vscale x 8 x half> %res
	}			}

	define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {			define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
	; CHECK-LABEL: splice_nxv4f32:			; CHECK-LABEL: splice_nxv4f32:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.s, vl4
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov x9, #-4
	; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: st1w { z0.s }, p0, [sp]
	; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -4)			%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -4)
	ret <vscale x 4 x float> %res			ret <vscale x 4 x float> %res
	}			}

	define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {			define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
	; CHECK-LABEL: splice_nxv4f32_1:			; CHECK-LABEL: splice_nxv4f32_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.s			; CHECK-NEXT: ptrue p0.s, vl1
	; CHECK-NEXT: lastb s0, p0, z0.s			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: insr z1.s, s0			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1)			%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1)
	ret <vscale x 4 x float> %res			ret <vscale x 4 x float> %res
	}			}

	define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {			define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {
	; CHECK-LABEL: splice_nxv4f32_neg5:			; CHECK-LABEL: splice_nxv4f32_neg5:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.s, vl5
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.s, p0.s
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
	; CHECK-NEXT: rdvl x9, #1
	; CHECK-NEXT: cmp x9, #20
	; CHECK-NEXT: mov w10, #20
	; CHECK-NEXT: csel x9, x9, x10, lo
	; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: st1w { z0.s }, p0, [sp]
	; CHECK-NEXT: sub x8, x8, x9
	; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -5)			%res = call <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -5)
	ret <vscale x 4 x float> %res			ret <vscale x 4 x float> %res
	}			}

	define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {			define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
	; CHECK-LABEL: splice_nxv2f64:			; CHECK-LABEL: splice_nxv2f64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov x9, #-2
	; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: st1d { z0.d }, p0, [sp]
	; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -2)			%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -2)
	ret <vscale x 2 x double> %res			ret <vscale x 2 x double> %res
	}			}

	define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {			define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
	; CHECK-LABEL: splice_nxv2f64_1:			; CHECK-LABEL: splice_nxv2f64_1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p0.d			; CHECK-NEXT: ptrue p0.d, vl1
	; CHECK-NEXT: lastb d0, p0, z0.d			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: insr z1.d, d0			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov z0.d, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1)			%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1)
	ret <vscale x 2 x double> %res			ret <vscale x 2 x double> %res
	}			}

	define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {			define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {
	; CHECK-LABEL: splice_nxv2f64_neg3:			; CHECK-LABEL: splice_nxv2f64_neg3:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl3
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: rdvl x9, #1
	; CHECK-NEXT: cmp x9, #24
	; CHECK-NEXT: mov w10, #24
	; CHECK-NEXT: csel x9, x9, x10, lo
	; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: st1d { z0.d }, p0, [sp]
	; CHECK-NEXT: sub x8, x8, x9
	; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)			%res = call <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3)
	ret <vscale x 2 x double> %res			ret <vscale x 2 x double> %res
	}			}

	; Ensure predicate based splice is promoted to use ZPRs.			; Ensure predicate based splice is promoted to use ZPRs.
	define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {			define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
	; CHECK-LABEL: splice_nxv2i1:			; CHECK-LABEL: splice_nxv2i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p2.d			; CHECK-NEXT: ptrue p2.d, vl1
	; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1			; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
	; CHECK-NEXT: lastb d0, p2, z0.d			; CHECK-NEXT: rev p2.d, p2.d
	; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1			; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
	; CHECK-NEXT: insr z1.d, d0			; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d
				; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: and z1.d, z1.d, #0x1			; CHECK-NEXT: and z1.d, z1.d, #0x1
	; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0			; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)			%res = call <vscale x 2 x i1> @llvm.experimental.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)
	ret <vscale x 2 x i1> %res			ret <vscale x 2 x i1> %res
	}			}

	; Ensure predicate based splice is promoted to use ZPRs.			; Ensure predicate based splice is promoted to use ZPRs.
	define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {			define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
	; CHECK-LABEL: splice_nxv4i1:			; CHECK-LABEL: splice_nxv4i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p2.s			; CHECK-NEXT: ptrue p2.s, vl1
	; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1			; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
	; CHECK-NEXT: lastb s0, p2, z0.s			; CHECK-NEXT: rev p2.s, p2.s
	; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1			; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
	; CHECK-NEXT: insr z1.s, s0			; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s
				; CHECK-NEXT: ptrue p0.s
	; CHECK-NEXT: and z1.s, z1.s, #0x1			; CHECK-NEXT: and z1.s, z1.s, #0x1
	; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0			; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)			%res = call <vscale x 4 x i1> @llvm.experimental.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)
	ret <vscale x 4 x i1> %res			ret <vscale x 4 x i1> %res
	}			}

	; Ensure predicate based splice is promoted to use ZPRs.			; Ensure predicate based splice is promoted to use ZPRs.
	define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {			define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
	; CHECK-LABEL: splice_nxv8i1:			; CHECK-LABEL: splice_nxv8i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p2.h			; CHECK-NEXT: ptrue p2.h, vl1
	; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1			; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
	; CHECK-NEXT: lastb h0, p2, z0.h			; CHECK-NEXT: rev p2.h, p2.h
	; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1			; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
	; CHECK-NEXT: insr z1.h, h0			; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h
				; CHECK-NEXT: ptrue p0.h
	; CHECK-NEXT: and z1.h, z1.h, #0x1			; CHECK-NEXT: and z1.h, z1.h, #0x1
	; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0			; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)			%res = call <vscale x 8 x i1> @llvm.experimental.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)
	ret <vscale x 8 x i1> %res			ret <vscale x 8 x i1> %res
	}			}

	; Ensure predicate based splice is promoted to use ZPRs.			; Ensure predicate based splice is promoted to use ZPRs.
	define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {			define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
	; CHECK-LABEL: splice_nxv16i1:			; CHECK-LABEL: splice_nxv16i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: ptrue p2.b			; CHECK-NEXT: ptrue p2.b, vl1
	; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1			; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
	; CHECK-NEXT: lastb b0, p2, z0.b			; CHECK-NEXT: rev p2.b, p2.b
	; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1			; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
	; CHECK-NEXT: insr z1.b, b0			; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b
				; CHECK-NEXT: ptrue p0.b
	; CHECK-NEXT: and z1.b, z1.b, #0x1			; CHECK-NEXT: and z1.b, z1.b, #0x1
	; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0			; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1)			%res = call <vscale x 16 x i1> @llvm.experimental.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1)
	ret <vscale x 16 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	; Verify promote type legalisation works as expected.			; Verify promote type legalisation works as expected.
	define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {			define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
	; CHECK-LABEL: splice_nxv2i8:			; CHECK-LABEL: splice_nxv2i8:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill			; CHECK-NEXT: ptrue p0.d, vl2
	; CHECK-NEXT: addvl sp, sp, #-2			; CHECK-NEXT: rev p0.d, p0.d
	; CHECK-NEXT: mov x8, sp			; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
	; CHECK-NEXT: mov x9, #-2
	; CHECK-NEXT: ptrue p0.d
	; CHECK-NEXT: st1d { z0.d }, p0, [sp]
	; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
	; CHECK-NEXT: addvl x8, x8, #1
	; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
	; CHECK-NEXT: addvl sp, sp, #2
	; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 -2)			%res = call <vscale x 2 x i8> @llvm.experimental.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 -2)
	ret <vscale x 2 x i8> %res			ret <vscale x 2 x i8> %res
	}			}

	; Verify splitvec type legalisation works as expected.			; Verify splitvec type legalisation works as expected.
	define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {			define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 {
	; CHECK-LABEL: splice_nxv8i32:			; CHECK-LABEL: splice_nxv8i32:
	▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines
	declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)			declare <vscale x 2 x float> @llvm.experimental.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
	declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)			declare <vscale x 4 x float> @llvm.experimental.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
	declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)			declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
	declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)			declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)

	attributes #0 = { nounwind "target-features"="+sve" }			attributes #0 = { nounwind "target-features"="+sve" }
	attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }			attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
	attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }			attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }
				attributes #3 = { nounwind "target-features"="+sve" vscale_range(4,16) }
				attributes #4 = { nounwind "target-features"="+sve" vscale_range(8,16) }

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Use splice instruction when lowering VECTOR_SPLICE
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 398900

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Use splice instruction when lowering VECTOR_SPLICEClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 398900

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

[SVE][CodeGen] Use splice instruction when lowering VECTOR_SPLICE
ClosedPublic