Diff 266762

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 17,419 Lines • ▼ Show 20 Lines	if (InOp.getValueType() != ScalarVT) {
assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());		assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);		return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
}		}
return InOp;		return InOp;
}		}

// extract_vector_elt of out-of-bounds element -> UNDEF		// extract_vector_elt of out-of-bounds element -> UNDEF
auto *IndexC = dyn_cast<ConstantSDNode>(Index);		auto *IndexC = dyn_cast<ConstantSDNode>(Index);
unsigned NumElts = VecVT.getVectorNumElements();		if (IndexC && VecVT.isFixedLengthVector() &&
unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();		IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
if (IndexC && IndexC->getAPIntValue().uge(NumElts))
return DAG.getUNDEF(ScalarVT);		return DAG.getUNDEF(ScalarVT);

// extract_vector_elt (build_vector x, y), 1 -> y		// extract_vector_elt (build_vector x, y), 1 -> y
if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&		if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) \|\|
		VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
		efriedmaUnsubmitted Done Reply Inline Actions Should this be `if ((VecOp.getOpcode() == ISD::SPLAT_VECTOR \|\| (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) && [...])`? efriedma: Should this be `if ((VecOp.getOpcode() == ISD::SPLAT_VECTOR \|\| (IndexC && VecOp.getOpcode() ==…
TLI.isTypeLegal(VecVT) &&		TLI.isTypeLegal(VecVT) &&
(VecOp.hasOneUse() \|\| TLI.aggressivelyPreferBuildVectorSources(VecVT))) {		(VecOp.hasOneUse() \|\| TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());		assert((VecOp.getOpcode() != ISD::BUILD_VECTOR \|\|
		VecVT.isFixedLengthVector()) &&
		"BUILD_VECTOR used for scalable vectors");
		unsigned IndexVal =
		VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
		SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();		EVT InEltVT = Elt.getValueType();

// Sometimes build_vector's scalar input types do not match result type.		// Sometimes build_vector's scalar input types do not match result type.
if (ScalarVT == InEltVT)		if (ScalarVT == InEltVT)
return Elt;		return Elt;

// TODO: It may be useful to truncate if free if the build_vector implicitly		// TODO: It may be useful to truncate if free if the build_vector implicitly
// converts.		// converts.
}		}

		if (VecVT.isScalableVector())
		return SDValue();

		// All the code from this point onwards assumes fixed width vectors, but it's
		// possible that some of the combinations could be made to work for scalable
		// vectors too.
		unsigned NumElts = VecVT.getVectorNumElements();
		unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();

// TODO: These transforms should not require the 'hasOneUse' restriction, but		// TODO: These transforms should not require the 'hasOneUse' restriction, but
// there are regressions on multiple targets without it. We can end up with a		// there are regressions on multiple targets without it. We can end up with a
// mess of scalar and vector code if we reduce only part of the DAG to scalar.		// mess of scalar and vector code if we reduce only part of the DAG to scalar.
if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&		if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
VecOp.hasOneUse()) {		VecOp.hasOneUse()) {
// The vector index of the LSBs of the source depend on the endian-ness.		// The vector index of the LSBs of the source depend on the endian-ness.
bool IsLE = DAG.getDataLayout().isLittleEndian();		bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned ExtractIndex = IndexC->getZExtValue();		unsigned ExtractIndex = IndexC->getZExtValue();
▲ Show 20 Lines • Show All 4,343 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 32 Lines
#include "llvm/CodeGen/MachineConstantPool.h"		#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"		#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"		#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"		#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"		#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"		#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"		#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
		#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"		#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"		#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"		#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"		#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"		#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"		#include "llvm/IR/DebugInfoMetadata.h"
▲ Show 20 Lines • Show All 5,307 Lines • ▼ Show 20 Lines	case ISD::EXTRACT_VECTOR_ELT:
assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&		assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() &&
"The result of EXTRACT_VECTOR_ELT must be at least as wide as the \		"The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
element type of the vector.");		element type of the vector.");

// Extract from an undefined value or using an undefined index is undefined.		// Extract from an undefined value or using an undefined index is undefined.
if (N1.isUndef() \|\| N2.isUndef())		if (N1.isUndef() \|\| N2.isUndef())
return getUNDEF(VT);		return getUNDEF(VT);

// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF		// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length
if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))		// vectors. For scalable vectors we will provide appropriate support for
		// dealing with arbitrary indices.
		if (N2C && N1.getValueType().isFixedLengthVector() &&
		N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))
return getUNDEF(VT);		return getUNDEF(VT);

// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is		// EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
// expanding copies of large vectors from registers.		// expanding copies of large vectors from registers. This only works for
if (N2C &&		// fixed length vectors, since we need to know the exact number of
N1.getOpcode() == ISD::CONCAT_VECTORS &&		// elements.
N1.getNumOperands() > 0) {		if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() &&
		N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) {
		efriedmaUnsubmitted Done Reply Inline Actions Please note why this doesn't work for scalable vectors. efriedma: Please note why this doesn't work for scalable vectors.
unsigned Factor =		unsigned Factor =
N1.getOperand(0).getValueType().getVectorNumElements();		N1.getOperand(0).getValueType().getVectorNumElements();
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,		return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
N1.getOperand(N2C->getZExtValue() / Factor),		N1.getOperand(N2C->getZExtValue() / Factor),
getVectorIdxConstant(N2C->getZExtValue() % Factor, DL));		getVectorIdxConstant(N2C->getZExtValue() % Factor, DL));
}		}

// EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is		// EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while
// expanding large vector constants.		// lowering is expanding large vector constants.
if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {		if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR \|\|
SDValue Elt = N1.getOperand(N2C->getZExtValue());		N1.getOpcode() == ISD::SPLAT_VECTOR)) {
		assert((N1.getOpcode() != ISD::BUILD_VECTOR \|\|
		N1.getValueType().isFixedLengthVector()) &&
		"BUILD_VECTOR used for scalable vectors");
		unsigned Index =
		N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0;
		SDValue Elt = N1.getOperand(Index);

if (VT != Elt.getValueType())		if (VT != Elt.getValueType())
// If the vector element type is not legal, the BUILD_VECTOR operands		// If the vector element type is not legal, the BUILD_VECTOR operands
// are promoted and implicitly truncated, and the result implicitly		// are promoted and implicitly truncated, and the result implicitly
// extended. Make that explicit here.		// extended. Make that explicit here.
Elt = getAnyExtOrTrunc(Elt, DL, VT);		Elt = getAnyExtOrTrunc(Elt, DL, VT);

return Elt;		return Elt;
Show All 17 Lines	if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
}		}

return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);		return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
}		}
}		}

// EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed		// EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
// when vector types are scalarized and v1iX is legal.		// when vector types are scalarized and v1iX is legal.
// vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)		// vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx).
		// Here we are completely ignoring the extract element index (N2),
		// which is fine for fixed width vectors, since any index other than 0
		// is undefined anyway. However, this cannot be ignored for scalable
		efriedmaUnsubmitted Not Done Reply Inline Actions This comment doesn't seem quite right. For fixed-width vectors, we only do this for vectors with exactly one element, but that's not because it's semantically incorrect for vectors with more than one element. It's easy to convert indexing on a subvector to indexing on the original vector. The issue is just that it would make legalization more complex: we don't want to do the transform for vectors with more than one element without a profitability check. efriedma: This comment doesn't seem quite right. For fixed-width vectors, we only do this for vectors…
		david-armAuthorUnsubmitted Done Reply Inline Actions OK fair enough. For the extract_vector_elt we're completely ignoring the index, which is fine for fixed width vectors, but not fine for scalable vectors, since we'd then need to generate code taking the index into account, which isn't worth it here. If the index > 0 for fixed width vectors the result is undefined anyway so we can simply return anything. I'll try to update the comment to be indicate this, david-arm: OK fair enough. For the extract_vector_elt we're completely ignoring the index, which is fine…
		// vectors - in theory we could support this, but we don't want to do this
		// without a profitability check.
if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&		if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
		N1.getValueType().isFixedLengthVector() &&
		efriedmaUnsubmitted Done Reply Inline Actions Please note why this doesn't work for scalable vectors. efriedma: Please note why this doesn't work for scalable vectors.
N1.getValueType().getVectorNumElements() == 1) {		N1.getValueType().getVectorNumElements() == 1) {
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),		return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
N1.getOperand(1));		N1.getOperand(1));
}		}
break;		break;
case ISD::EXTRACT_ELEMENT:		case ISD::EXTRACT_ELEMENT:
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");		assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
assert(!N1.getValueType().isVector() && !VT.isVector() &&		assert(!N1.getValueType().isVector() && !VT.isVector() &&
▲ Show 20 Lines • Show All 4,415 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 1,845 Lines • ▼ Show 20 Lines	def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
(DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),		(DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
$src)>;		$src)>;
def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)),		def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)),
(CPY_ZPmV_D ZPR:$vec,		(CPY_ZPmV_D ZPR:$vec,
(CMPEQ_PPzZZ_D (PTRUE_D 31),		(CMPEQ_PPzZZ_D (PTRUE_D 31),
(INDEX_II_D 0, 1),		(INDEX_II_D 0, 1),
(DUP_ZR_D $index)),		(DUP_ZR_D $index)),
$src)>;		$src)>;

		// Extract element from vector with immediate index
		def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>;
		def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), ssub)>;
		def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
		def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
		def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
		def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
		def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
		(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;

		// Extract element from vector with scalar index
		def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), GPR64:$index)),
		(LASTB_RPZ_B (WHILELS_PXX_B XZR, GPR64:$index),
		ZPR:$vec)>;
		def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), GPR64:$index)),
		efriedmaUnsubmitted Done Reply Inline Actions Maybe better to use `lastb(whilels(0, idx), vec)` or something like that? efriedma: Maybe better to use `lastb(whilels(0, idx), vec)` or something like that?
		(LASTB_RPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
		ZPR:$vec)>;
		def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), GPR64:$index)),
		(LASTB_RPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
		ZPR:$vec)>;
		def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), GPR64:$index)),
		(LASTB_RPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
		ZPR:$vec)>;

		def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), GPR64:$index)),
		(LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index),
		ZPR:$vec)>;
		def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
		(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index),
		ZPR:$vec)>;
		def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), GPR64:$index)),
		(LASTB_VPZ_D (WHILELS_PXX_D XZR, GPR64:$index),
		ZPR:$vec)>;
}		}

let Predicates = [HasSVE, HasMatMulInt8] in {		let Predicates = [HasSVE, HasMatMulInt8] in {
defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;		defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;		defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;		defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;		defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;		defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
▲ Show 20 Lines • Show All 446 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-extract-element.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: test_lane0_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.b, b0
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 16 x i8> %a, i32 0
				ret i8 %b
				}

				define i16 @test_lane0_8xi16(<vscale x 8 x i16> %a) {
				; CHECK-LABEL: test_lane0_8xi16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.h, h0
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 8 x i16> %a, i32 0
				ret i16 %b
				}

				define i32 @test_lane0_4xi32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: test_lane0_4xi32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.s, s0
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 4 x i32> %a, i32 0
				ret i32 %b
				}

				define i64 @test_lane0_2xi64(<vscale x 2 x i64> %a) {
				; CHECK-LABEL: test_lane0_2xi64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.d, d0
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 2 x i64> %a, i32 0
				ret i64 %b
				}

				define double @test_lane0_2xf64(<vscale x 2 x double> %a) {
				; CHECK-LABEL: test_lane0_2xf64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 2 x double> %a, i32 0
				ret double %b
				}

				define float @test_lane0_4xf32(<vscale x 4 x float> %a) {
				; CHECK-LABEL: test_lane0_4xf32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 4 x float> %a, i32 0
				ret float %b
				}

				define half @test_lane0_8xf16(<vscale x 8 x half> %a) {
				; CHECK-LABEL: test_lane0_8xf16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 8 x half> %a, i32 0
				ret half %b
				}

				define i8 @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.b, xzr, x8
				; CHECK-NEXT: lastb w0, p0, z0.b
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 16 x i8> %a, i32 %x
				ret i8 %b
				}

				define i16 @test_lanex_8xi16(<vscale x 8 x i16> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_8xi16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.h, xzr, x8
				; CHECK-NEXT: lastb w0, p0, z0.h
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 8 x i16> %a, i32 %x
				ret i16 %b
				}

				define i32 @test_lanex_4xi32(<vscale x 4 x i32> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_4xi32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.s, xzr, x8
				; CHECK-NEXT: lastb w0, p0, z0.s
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 4 x i32> %a, i32 %x
				ret i32 %b
				}

				define i64 @test_lanex_2xi64(<vscale x 2 x i64> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_2xi64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.d, xzr, x8
				; CHECK-NEXT: lastb x0, p0, z0.d
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 2 x i64> %a, i32 %x
				ret i64 %b
				}

				define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_2xf64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.d, xzr, x8
				; CHECK-NEXT: lastb d0, p0, z0.d
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 2 x double> %a, i32 %x
				ret double %b
				}

				define float @test_lanex_4xf32(<vscale x 4 x float> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_4xf32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.s, xzr, x8
				; CHECK-NEXT: lastb s0, p0, z0.s
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 4 x float> %a, i32 %x
				ret float %b
				}

				define half @test_lanex_8xf16(<vscale x 8 x half> %a, i32 %x) {
				; CHECK-LABEL: test_lanex_8xf16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
				; CHECK-NEXT: sxtw x8, w0
				; CHECK-NEXT: whilels p0.h, xzr, x8
				; CHECK-NEXT: lastb h0, p0, z0.h
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 8 x half> %a, i32 %x
				ret half %b
				}

				; Deliberately choose an index that is out-of-bounds
				define i8 @test_lane64_16xi8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: test_lane64_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #64
				; CHECK-NEXT: whilels p0.b, xzr, x8
				; CHECK-NEXT: lastb w0, p0, z0.b
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 16 x i8> %a, i32 64
				ret i8 %b
				}

				define double @test_lane9_2xf64(<vscale x 2 x double> %a) {
				; CHECK-LABEL: test_lane9_2xf64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilels p0.d, xzr, x8
				; CHECK-NEXT: lastb d0, p0, z0.d
				efriedmaUnsubmitted Done Reply Inline Actions This doesn't assemble. (I assume you meant to refer to xzr?) efriedma: This doesn't assemble. (I assume you meant to refer to xzr?)
				david-armAuthorUnsubmitted Done Reply Inline Actions OK, I'll take a look. I built and ran a test for variants of v16i8, but missed this, david-arm: OK, I'll take a look. I built and ran a test for variants of v16i8, but missed this,
				david-armAuthorUnsubmitted Done Reply Inline Actions Hi Eli, I tried assembling this and it worked for me with this command: llc --filetype=obj -mtriple=aarch64-linux-gnu -mattr=+sve < ../llvm/test/CodeGen/AArch64/sve-extract-element.ll > test.o What command did you use? david-arm: Hi Eli, I tried assembling this and it worked for me with this command: llc --filetype=obj…
				efriedmaUnsubmitted Not Done Reply Inline Actions `echo "whilels p0.d, #0, x8" \| llvm-mc -triple=aarch64 -mattr=+sve` efriedma: `echo "whilels p0.d, #0, x8" \| llvm-mc -triple=aarch64 -mattr=+sve`
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 2 x double> %a, i32 9
				ret double %b
				}

				; Deliberately choose an index that is undefined
				define i32 @test_lane64_4xi32(<vscale x 4 x i32> %a) {
				; CHECK-LABEL: test_lane64_4xi32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.s, s0
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 4 x i32> %a, i32 undef
				ret i32 %b
				}

				define i8 @extract_of_insert_undef_16xi8(i8 %a) {
				; CHECK-LABEL: extract_of_insert_undef_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ret
				%b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
				%c = extractelement <vscale x 16 x i8> %b, i32 0
				ret i8 %c
				}

				define i8 @extract0_of_insert0_16xi8(<vscale x 16 x i8> %a, i8 %b) {
				; CHECK-LABEL: extract0_of_insert0_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ret
				%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
				%d = extractelement <vscale x 16 x i8> %c, i32 0
				ret i8 %d
				}

				define i8 @extract64_of_insert64_16xi8(<vscale x 16 x i8> %a, i8 %b) {
				; CHECK-LABEL: extract64_of_insert64_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ret
				%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 64
				%d = extractelement <vscale x 16 x i8> %c, i32 64
				ret i8 %d
				}

				define i8 @extract_of_insert_diff_lanes_16xi8(<vscale x 16 x i8> %a, i8 %b) {
				; CHECK-LABEL: extract_of_insert_diff_lanes_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z0.b, z0.b[3]
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				%c = insertelement <vscale x 16 x i8> %a, i8 %b, i32 0
				%d = extractelement <vscale x 16 x i8> %c, i32 3
				ret i8 %d
				}

				define i8 @test_lane0_zero_16xi8(<vscale x 16 x i8> %a) {
				; CHECK-LABEL: test_lane0_zero_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w0, wzr
				; CHECK-NEXT: ret
				%b = extractelement <vscale x 16 x i8> zeroinitializer, i32 0
				ret i8 %b
				}

				; The DAG combiner should fold the extract of a splat to give element zero
				; of the splat, i.e. %x. If the index is beyond the end of the scalable
				; vector the result is undefined anyway.
				define i64 @test_lanex_splat_2xi64(i64 %x, i32 %y) {
				; CHECK-LABEL: test_lanex_splat_2xi64:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ret
				%a = insertelement <vscale x 2 x i64> undef, i64 %x, i32 0
				%b = shufflevector <vscale x 2 x i64> %a, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
				%c = extractelement <vscale x 2 x i64> %b, i32 %y
				ret i64 %c
				}

llvm/test/CodeGen/AArch64/sve-insert-element.ll

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

	define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {			define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
	; CHECK-LABEL: test_lane0_16xi8			; CHECK-LABEL: test_lane0_16xi8:
	; CHECK: mov [[REG:.*]], #30			; CHECK: // %bb.0:
	; CHECK: mov z0.b, p{{[0-7]}}/m, [[REG]]			; CHECK-NEXT: ptrue p0.b, vl1
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.b, p0/m, w8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0			%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
	ret <vscale x 16 x i8> %b			ret <vscale x 16 x i8> %b
	}			}

	define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {			define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
	; CHECK-LABEL: test_lane0_8xi16			; CHECK-LABEL: test_lane0_8xi16:
	; CHECK: mov [[REG:.*]], #30			; CHECK: // %bb.0:
	; CHECK: mov z0.h, p{{[0-7]}}/m, [[REG]]			; CHECK-NEXT: ptrue p0.h, vl1
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.h, p0/m, w8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0			%b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
	ret <vscale x 8 x i16> %b			ret <vscale x 8 x i16> %b
	}			}

	define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {			define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
	; CHECK-LABEL: test_lane0_4xi32			; CHECK-LABEL: test_lane0_4xi32:
	; CHECK: mov [[REG:.*]], #30			; CHECK: // %bb.0:
	; CHECK: mov z0.s, p{{[0-7]}}/m, [[REG]]			; CHECK-NEXT: ptrue p0.s, vl1
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.s, p0/m, w8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0			%b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
	ret <vscale x 4 x i32> %b			ret <vscale x 4 x i32> %b
	}			}

	define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {			define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
	; CHECK-LABEL: test_lane0_2xi64			; CHECK-LABEL: test_lane0_2xi64:
	; CHECK: mov w[[REG:.*]], #30			; CHECK: // %bb.0:
	; CHECK: mov z0.d, p{{[0-7]}}/m, x[[REG]]			; CHECK-NEXT: ptrue p0.d, vl1
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.d, p0/m, x8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0			%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
	ret <vscale x 2 x i64> %b			ret <vscale x 2 x i64> %b
	}			}

	define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {			define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
	; CHECK-LABEL: test_lane0_2xf64			; CHECK-LABEL: test_lane0_2xf64:
	; CHECK: fmov d[[REG:[0-9]+]], #1.00000000			; CHECK: // %bb.0:
	; CHECK: mov z0.d, p{{[0-7]}}/m, z[[REG]].d			; CHECK-NEXT: fmov d1, #1.00000000
				; CHECK-NEXT: ptrue p0.d, vl1
				; CHECK-NEXT: mov z0.d, p0/m, z1.d
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0			%b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
	ret <vscale x 2 x double> %b			ret <vscale x 2 x double> %b
	}			}

	define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {			define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
	; CHECK-LABEL: test_lane0_4xf32			; CHECK-LABEL: test_lane0_4xf32:
	; CHECK: fmov s[[REG:[0-9]+]], #1.00000000			; CHECK: // %bb.0:
	; CHECK: mov z0.s, p{{[0-7]}}/m, z[[REG]].s			; CHECK-NEXT: fmov s1, #1.00000000
				; CHECK-NEXT: ptrue p0.s, vl1
				; CHECK-NEXT: mov z0.s, p0/m, z1.s
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0			%b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
	ret <vscale x 4 x float> %b			ret <vscale x 4 x float> %b
	}			}

	define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {			define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
	; CHECK-LABEL: test_lane0_8xf16			; CHECK-LABEL: test_lane0_8xf16:
	; CHECK: fmov h[[REG:[0-9]+]], #1.00000000			; CHECK: // %bb.0:
	; CHECK: mov z0.h, p{{[0-7]}}/m, z[[REG]].h			; CHECK-NEXT: fmov h1, #1.00000000
				; CHECK-NEXT: ptrue p0.h, vl1
				; CHECK-NEXT: mov z0.h, p0/m, z1.h
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0			%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
	ret <vscale x 8 x half> %b			ret <vscale x 8 x half> %b
	}			}

	; Undefined lane insert			; Undefined lane insert
	define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {			define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
	; CHECK-LABEL: test_lane4_2xi64			; CHECK-LABEL: test_lane4_2xi64:
	; CHECK: mov w[[IDXREG:.*]], #4			; CHECK: // %bb.0:
	; CHECK: index z[[CMPVEC:[0-9]+]].d, #0, #1			; CHECK-NEXT: mov w8, #4
	; CHECK: mov z[[IDXVEC:[0-9]+]].d, x[[IDXREG]]			; CHECK-NEXT: index z1.d, #0, #1
	; CHECK: cmpeq p[[PRED:[0-9]+]].d, p{{[0-7]}}/z, z[[CMPVEC]].d, z[[IDXVEC]].d			; CHECK-NEXT: ptrue p0.d
	; CHECK: mov w[[VALREG:.*]], #30			; CHECK-NEXT: mov z2.d, x8
	; CHECK: mov z0.d, p[[PRED]]/m, x[[VALREG]]			; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.d, p0/m, x8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4			%b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
	ret <vscale x 2 x i64> %b			ret <vscale x 2 x i64> %b
	}			}

	; Undefined lane insert			; Undefined lane insert
	define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {			define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
	; CHECK-LABEL: test_lane9_8xf16			; CHECK-LABEL: test_lane9_8xf16:
	; CHECK: mov w[[IDXREG:.*]], #9			; CHECK: // %bb.0:
	; CHECK: index z[[CMPVEC:[0-9]+]].h, #0, #1			; CHECK-NEXT: mov w8, #9
	; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]			; CHECK-NEXT: index z1.h, #0, #1
	; CHECK: cmpeq p[[PRED:[0-9]+]].h, p{{[0-7]}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h			; CHECK-NEXT: ptrue p0.h
	; CHECK: fmov h[[VALREG:[0-9]+]], #1.00000000			; CHECK-NEXT: mov z2.h, w8
	; CHECK: mov z0.h, p[[PRED]]/m, h[[VALREG]]			; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
				; CHECK-NEXT: fmov h1, #1.00000000
				; CHECK-NEXT: mov z0.h, p0/m, h1
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9			%b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
	ret <vscale x 8 x half> %b			ret <vscale x 8 x half> %b
	}			}

	define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {			define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
	; CHECK-LABEL: test_lane1_16xi8			; CHECK-LABEL: test_lane1_16xi8:
	; CHECK: mov w[[IDXREG:.*]], #1			; CHECK: // %bb.0:
	; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1			; CHECK-NEXT: mov w8, #1
	; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]			; CHECK-NEXT: index z1.b, #0, #1
	; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b			; CHECK-NEXT: ptrue p0.b
	; CHECK: mov w[[VALREG:.*]], #30			; CHECK-NEXT: mov z2.b, w8
	; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]]			; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.b, p0/m, w8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1			%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
	ret <vscale x 16 x i8> %b			ret <vscale x 16 x i8> %b
	}			}

	define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {			define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
	; CHECK-LABEL: test_lanex_16xi8			; CHECK-LABEL: test_lanex_16xi8:
	; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1			; CHECK: // %bb.0:
	; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]			; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
	; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b			; CHECK-NEXT: sxtw x8, w0
	; CHECK: mov w[[VALREG:.*]], #30			; CHECK-NEXT: index z1.b, #0, #1
	; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]]			; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: mov z2.b, w8
				; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b
				; CHECK-NEXT: mov w8, #30
				; CHECK-NEXT: mov z0.b, p0/m, w8
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x			%b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
	ret <vscale x 16 x i8> %b			ret <vscale x 16 x i8> %b
	}			}


	; Redundant lane insert			; Redundant lane insert
	define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {			define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
	; CHECK-LABEL: extract_insert_4xi32			; CHECK-LABEL: extract_insert_4xi32:
	; CHECK-NOT: mov w{{.*}}, #30			; CHECK: // %bb.0:
	; CHECK-NOT: mov z0.d			; CHECK-NEXT: ret
	%b = extractelement <vscale x 4 x i32> %a, i32 2			%b = extractelement <vscale x 4 x i32> %a, i32 2
	%c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2			%c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
	ret <vscale x 4 x i32> %c			ret <vscale x 4 x i32> %c
	}			}

	define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {			define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
	; CHECK-LABEL: test_lane6_undef_8xi16			; CHECK-LABEL: test_lane6_undef_8xi16:
	; CHECK: mov w[[IDXREG:.*]], #6			; CHECK: // %bb.0:
	; CHECK: index z[[CMPVEC:.*]].h, #0, #1			; CHECK-NEXT: mov w8, #6
	; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]			; CHECK-NEXT: index z0.h, #0, #1
	; CHECK: cmpeq p[[PRED:.]].h, p{{.}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h			; CHECK-NEXT: mov z1.h, w8
	; CHECK: mov z0.h, p[[PRED]]/m, w0			; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
				; CHECK-NEXT: mov z0.h, p0/m, w0
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6			%b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
	ret <vscale x 8 x i16> %b			ret <vscale x 8 x i16> %b
	}			}

	define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {			define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
	; CHECK-LABEL: test_lane0_undef_16xi8			; CHECK-LABEL: test_lane0_undef_16xi8:
	; CHECK: fmov s0, w0			; CHECK: // %bb.0:
				; CHECK-NEXT: fmov s0, w0
				; CHECK-NEXT: ret
	%b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0			%b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
	ret <vscale x 16 x i8> %b			ret <vscale x 16 x i8> %b
	}			}

				define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
				; CHECK-LABEL: test_insert0_of_extract0_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z1.b, b1
				; CHECK-NEXT: ptrue p0.b, vl1
				; CHECK-NEXT: fmov w8, s1
				; CHECK-NEXT: mov z0.b, p0/m, w8
				; CHECK-NEXT: ret
				%c = extractelement <vscale x 16 x i8> %b, i32 0
				%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
				ret <vscale x 16 x i8> %d
				}

				define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
				; CHECK-LABEL: test_insert64_of_extract64_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #64
				; CHECK-NEXT: index z2.b, #0, #1
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: whilels p1.b, xzr, x8
				; CHECK-NEXT: mov z3.b, w8
				; CHECK-NEXT: lastb w8, p1, z1.b
				; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
				; CHECK-NEXT: mov z0.b, p0/m, w8
				; CHECK-NEXT: ret
				%c = extractelement <vscale x 16 x i8> %b, i32 64
				%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
				ret <vscale x 16 x i8> %d
				}

				define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
				; CHECK-LABEL: test_insert3_of_extract1_16xi8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z1.b, z1.b[1]
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: index z2.b, #0, #1
				; CHECK-NEXT: fmov w9, s1
				; CHECK-NEXT: mov z1.b, w8
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b
				; CHECK-NEXT: mov z0.b, p0/m, w9
				; CHECK-NEXT: ret
				%c = extractelement <vscale x 16 x i8> %b, i32 1
				%d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
				ret <vscale x 16 x i8> %d
				}

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Add support for extracting elements of scalable vectors
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 266762

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-extract-element.ll

llvm/test/CodeGen/AArch64/sve-insert-element.ll

This is an archive of the discontinued LLVM Phabricator instance.

[CodeGen] Add support for extracting elements of scalable vectorsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 266762

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/test/CodeGen/AArch64/sve-extract-element.ll

llvm/test/CodeGen/AArch64/sve-insert-element.ll

[CodeGen] Add support for extracting elements of scalable vectors
ClosedPublic