Diff 284665

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Show First 20 Lines • Show All 1,450 Lines • ▼ Show 20 Lines	void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue Vec = N->getOperand(0);		SDValue Vec = N->getOperand(0);
SDValue Elt = N->getOperand(1);		SDValue Elt = N->getOperand(1);
SDValue Idx = N->getOperand(2);		SDValue Idx = N->getOperand(2);
SDLoc dl(N);		SDLoc dl(N);
GetSplitVector(Vec, Lo, Hi);		GetSplitVector(Vec, Lo, Hi);

if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {		if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned IdxVal = CIdx->getZExtValue();		unsigned IdxVal = CIdx->getZExtValue();
unsigned LoNumElts = Lo.getValueType().getVectorNumElements();		unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
if (IdxVal < LoNumElts)		if (IdxVal < LoNumElts) {
		david-armUnsubmitted Done Reply Inline Actions Maybe call getVectorMinNumElements() instead? david-arm: Maybe call getVectorMinNumElements() instead?
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,		Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);		Lo.getValueType(), Lo, Elt, Idx);
else		return;
		} else if (!Vec.getValueType().isScalableVector()) {
Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,		Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));		DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
return;		return;
}		}
		}

// See if the target wants to custom expand this node.		// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(0), true))		if (CustomLowerNode(N, N->getValueType(0), true))
return;		return;

// Make the vector elements byte-addressable if they aren't already.		// Make the vector elements byte-addressable if they aren't already.
EVT VecVT = Vec.getValueType();		EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();		EVT EltVT = VecVT.getVectorElementType();
if (VecVT.getScalarSizeInBits() < 8) {		if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;		EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,		VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());		VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);		Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
// Extend the element type to match if needed.		// Extend the element type to match if needed.
if (EltVT.bitsGT(Elt.getValueType()))		if (EltVT.bitsGT(Elt.getValueType()))
Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);		Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
}		}

// Spill the vector to the stack.		// Spill the vector to the stack.
// In cases where the vector is illegal it will be broken down into parts		// In cases where the vector is illegal it will be broken down into parts
// and stored in parts - we should use the alignment for the smallest part.		// and stored in parts - we should use the alignment for the smallest part.
Align SmallestAlign = DAG.getReducedAlign(VecVT, /UseABI=/false);		Align SmallestAlign = DAG.getReducedAlign(VecVT, /UseABI=/false);
SDValue StackPtr =		SDValue StackPtr =
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);		DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
auto &MF = DAG.getMachineFunction();		auto &MF = DAG.getMachineFunction();
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();		auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);		auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);

SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,		SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
SmallestAlign);		SmallestAlign);

// Store the new element. This may be larger than the vector element type,		// Store the new element. This may be larger than the vector element type,
// so use a truncating store.		// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);		SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Store = DAG.getTruncStore(		Store = DAG.getTruncStore(
Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,		Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));		commonAlignment(SmallestAlign,
		EltVT.getSizeInBits().getFixedSize() / 8));
		efriedmaUnsubmitted Done Reply Inline Actions getFixedSize()? efriedma: getFixedSize()?

EVT LoVT, HiVT;		EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);		std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);

// Load the Lo part from the stack slot.		// Load the Lo part from the stack slot.
Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);		Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);

// Increment the pointer to the other part.		// Increment the pointer to the other part.
unsigned IncrementSize = LoVT.getSizeInBits() / 8;		auto Load = cast<LoadSDNode>(Lo);
		david-armUnsubmitted Not Done Reply Inline Actions It might be cleaner to do this now: TypeSize IncrementSize = LoVT.getSizeInBits() / 8; then pass that directly to getMemBasePlusOffset? david-arm: It might be cleaner to do this now: TypeSize IncrementSize = LoVT.getSizeInBits() / 8; then…
		kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions I used your suggestion below to replace the call to `getMemBasePlusOffset` with a call to the new `IncrementPointer` function, which removed the need to calculate IncrementSize here. kmclaughlin: I used your suggestion below to replace the call to `getMemBasePlusOffset` with a call to the…
StackPtr =		MachinePointerInfo MPI = Load->getPointerInfo();
		efriedmaUnsubmitted Not Done Reply Inline Actions Can we pass a TypeSize here, instead of passing the offset and scalable bit separately? efriedma: Can we pass a TypeSize here, instead of passing the offset and scalable bit separately?
		kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions Since TypeSize uses uint64_t, I thought it would be better to pass in IsScalable as a separate argument so that we can keep the Offset argument as a signed value. kmclaughlin: Since TypeSize uses uint64_t, I thought it would be better to pass in IsScalable as a separate…
		efriedmaUnsubmitted Not Done Reply Inline Actions We could introduce a dedicated TypeOffset type if you think that's really an issue. I suspect getMemBasePlusOffset is rarely, if ever, used with a negative offset, though. Maybe add an overload that takes a TypeSize and go from there. I really want to avoid getKnownMinSize() where it isn't necessary; it's going to be a lot easier to make mistakes passing around possibly-scaled offsets as bare integers. efriedma: We could introduce a dedicated TypeOffset type if you think that's really an issue. I suspect…
		kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions I had a look through the uses of getMemBasePlusOffset (and getObjectPtrOffset) and I think you're right that it is not used with negative offsets, so I created a separate patch which changes the type of the Offset argument rather than adding an overload kmclaughlin: I had a look through the uses of getMemBasePlusOffset (and getObjectPtrOffset) and I think…
DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);		IncrementPointer(Load, LoVT, MPI, StackPtr);

// Load the Hi part from the stack slot.		Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
		david-armUnsubmitted Not Done Reply Inline Actions I think this might be broken now since the offset doesn't make sense for scalable vectors. Maybe you can replace the DAG.getMemBasePlusOffset call with the new IncrementPointer helper function? That would also give you a MachinePointerInfo object that you can pass to the load here? I've also started making use of IncrementPointer in my patches too. david-arm: I think this might be broken now since the offset doesn't make sense for scalable vectors.
		kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions I've replaced the call here as suggested, though the changes I've made in this patch to getMemBasePlusOffset are still necessary in ensuring the immediate is clamped correctly (it is used by getVectorElementPointer) kmclaughlin: I've replaced the call here as suggested, though the changes I've made in this patch to…
Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
PtrInfo.getWithOffset(IncrementSize), SmallestAlign);

		david-armUnsubmitted Done Reply Inline Actions nit: Maybe drop the Load->getMemOperand()->getFlags() and Load->getAAInfo() arguments, since the original version didn't have them? david-arm: nit: Maybe drop the Load->getMemOperand()->getFlags() and Load->getAAInfo() arguments, since…
// If we adjusted the original type, we need to truncate the results.		// If we adjusted the original type, we need to truncate the results.
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));		std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
if (LoVT != Lo.getValueType())		if (LoVT != Lo.getValueType())
Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);		Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
if (HiVT != Hi.getValueType())		if (HiVT != Hi.getValueType())
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);		Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}		}

▲ Show 20 Lines • Show All 3,757 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,019 Lines • ▼ Show 20 Lines	if (RedAlign2 < RedAlign)
RedAlign = RedAlign2;		RedAlign = RedAlign2;
}		}

return RedAlign;		return RedAlign;
}		}

SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {		SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
MachineFrameInfo &MFI = MF->getFrameInfo();		MachineFrameInfo &MFI = MF->getFrameInfo();
int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false);		const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
		int StackID = 0;
		if (Bytes.isScalable())
		StackID = TFI->getStackIDForScalableVectors();
		int FrameIdx = MFI.CreateStackObject(Bytes, Alignment,
		false, nullptr, StackID);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));		return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}		}

SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {		SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
Type Ty = VT.getTypeForEVT(getContext());		Type Ty = VT.getTypeForEVT(getContext());
Align StackAlign =		Align StackAlign =
std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign));		std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign));
return CreateStackTemporary(VT.getStoreSize(), StackAlign);		return CreateStackTemporary(VT.getStoreSize(), StackAlign);
▲ Show 20 Lines • Show All 3,895 Lines • ▼ Show 20 Lines	if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
return DAG.getConstant(Val, dl, VT);		return DAG.getConstant(Val, dl, VT);
return SDValue(nullptr, 0);		return SDValue(nullptr, 0);
}		}

SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,		SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
const SDLoc &DL,		const SDLoc &DL,
const SDNodeFlags Flags) {		const SDNodeFlags Flags) {
EVT VT = Base.getValueType();		EVT VT = Base.getValueType();
return getMemBasePlusOffset(Base, getConstant(Offset.getFixedSize(), DL, VT),		SDValue Index;
DL, Flags);
		if (Offset.isScalable())
		Index = getVScale(DL, Base.getValueType(),
		APInt(Base.getValueSizeInBits().getFixedSize(),
		Offset.getKnownMinSize()));
		else
		Index = getConstant(Offset.getFixedSize(), DL, VT);

		return getMemBasePlusOffset(Base, Index, DL, Flags);
}		}

SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,		SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
const SDLoc &DL,		const SDLoc &DL,
const SDNodeFlags Flags) {		const SDNodeFlags Flags) {
assert(Offset.getValueType().isInteger());		assert(Offset.getValueType().isInteger());
EVT BasePtrVT = Ptr.getValueType();		EVT BasePtrVT = Ptr.getValueType();
return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);		return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);
▲ Show 20 Lines • Show All 4,038 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,191 Lines • ▼ Show 20 Lines	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,

return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);		return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}		}

static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,		static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
SDValue Idx,		SDValue Idx,
EVT VecVT,		EVT VecVT,
const SDLoc &dl) {		const SDLoc &dl) {
if (isa<ConstantSDNode>(Idx))		if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
return Idx;		return Idx;

EVT IdxVT = Idx.getValueType();		EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorNumElements();		unsigned NElts = VecVT.getVectorMinNumElements();
		if (VecVT.isScalableVector()) {
		SDValue VS = DAG.getVScale(dl, IdxVT,
		APInt(IdxVT.getSizeInBits().getFixedSize(),
		efriedmaUnsubmitted Done Reply Inline Actions getFixedSize()? efriedma: getFixedSize()?
		NElts));
		SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS,
		DAG.getConstant(1, dl, IdxVT));

		return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
		efriedmaUnsubmitted Not Done Reply Inline Actions The non-scalable path uses masking; I guess you can't do that here because the element count might not be a power of two? Given that, the end result here isn't great, but I can't come up with anything better. efriedma: The non-scalable path uses masking; I guess you can't do that here because the element count…
		kmclaughlinAuthorUnsubmitted Not Done Reply Inline Actions Yes, masking wasn't used here since we can't be sure that `NElts` is a power of two kmclaughlin: Yes, masking wasn't used here since we can't be sure that `NElts` is a power of two
		} else {
if (isPowerOf2_32(NElts)) {		if (isPowerOf2_32(NElts)) {
APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),		APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
Log2_32(NElts));		Log2_32(NElts));
return DAG.getNode(ISD::AND, dl, IdxVT, Idx,		return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
DAG.getConstant(Imm, dl, IdxVT));		DAG.getConstant(Imm, dl, IdxVT));
}		}
		}

return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,		return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
DAG.getConstant(NElts - 1, dl, IdxVT));		DAG.getConstant(NElts - 1, dl, IdxVT));
}		}

SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,		SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue VecPtr, EVT VecVT,		SDValue VecPtr, EVT VecVT,
SDValue Index) const {		SDValue Index) const {
SDLoc dl(Index);		SDLoc dl(Index);
// Make sure the index type is big enough to compute in.		// Make sure the index type is big enough to compute in.
Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());		Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());

EVT EltVT = VecVT.getVectorElementType();		EVT EltVT = VecVT.getVectorElementType();

// Calculate the element offset and add it to the pointer.		// Calculate the element offset and add it to the pointer.
unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.		unsigned EltSize = EltVT.getSizeInBits().getFixedSize() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getSizeInBits() &&		assert(EltSize * 8 == EltVT.getSizeInBits().getFixedSize() &&
		efriedmaUnsubmitted Done Reply Inline Actions getFixedSize()? efriedma: getFixedSize()?
"Converting bits to bytes lost precision");		"Converting bits to bytes lost precision");

Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);		Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);

EVT IdxVT = Index.getValueType();		EVT IdxVT = Index.getValueType();

Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,		Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));		DAG.getConstant(EltSize, dl, IdxVT));
return DAG.getMemBasePlusOffset(VecPtr, Index, dl);		return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
		david-armUnsubmitted Done Reply Inline Actions nit: Stray new line here. david-arm: nit: Stray new line here.
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model		// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,		SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
▲ Show 20 Lines • Show All 700 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck %s

				; INSERT VECTOR ELT

				define <vscale x 8 x i8> @promote_insert_8i8(<vscale x 8 x i8> %a, i8 %elt, i64 %idx) {
				; CHECK-LABEL: promote_insert_8i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov z1.h, w1
				; CHECK-NEXT: index z2.h, #0, #1
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h
				; CHECK-NEXT: mov z0.h, p0/m, w0
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 8 x i8> %a, i8 %elt, i64 %idx
				ret <vscale x 8 x i8> %ins
				}

				define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt, i64 %idx) {
				; CHECK-LABEL: split_insert_32i8_idx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-2
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: rdvl x8, #2
				; CHECK-NEXT: sub x8, x8, #1 // =1
				; CHECK-NEXT: cmp x1, x8
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: csel x8, x1, x8, lo
				; CHECK-NEXT: mov x9, sp
				; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
				; CHECK-NEXT: st1b { z0.b }, p0, [sp]
				; CHECK-NEXT: strb w0, [x9, x8]
				; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl]
				; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
				; CHECK-NEXT: addvl sp, sp, #2
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 32 x i8> %a, i8 %elt, i64 %idx
				ret <vscale x 32 x i8> %ins
				}

				define <vscale x 8 x float> @split_insert_8f32_idx(<vscale x 8 x float> %a, float %elt, i64 %idx) {
				; CHECK-LABEL: split_insert_8f32_idx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-2
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: cnth x8
				; CHECK-NEXT: sub x8, x8, #1 // =1
				; CHECK-NEXT: cmp x0, x8
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: csel x8, x0, x8, lo
				; CHECK-NEXT: mov x9, sp
				; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
				; CHECK-NEXT: st1w { z0.s }, p0, [sp]
				; CHECK-NEXT: str s2, [x9, x8, lsl #2]
				; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl]
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
				; CHECK-NEXT: addvl sp, sp, #2
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 8 x float> %a, float %elt, i64 %idx
				ret <vscale x 8 x float> %ins
				}

				define <vscale x 8 x i64> @split_insert_8i64_idx(<vscale x 8 x i64> %a, i64 %elt, i64 %idx) {
				; CHECK-LABEL: split_insert_8i64_idx:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-4
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: cnth x8
				; CHECK-NEXT: sub x8, x8, #1 // =1
				; CHECK-NEXT: cmp x1, x8
				; CHECK-NEXT: ptrue p0.d
				; CHECK-NEXT: csel x8, x1, x8, lo
				; CHECK-NEXT: mov x9, sp
				; CHECK-NEXT: st1d { z3.d }, p0, [x9, #3, mul vl]
				; CHECK-NEXT: st1d { z2.d }, p0, [x9, #2, mul vl]
				; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
				; CHECK-NEXT: st1d { z0.d }, p0, [sp]
				; CHECK-NEXT: str x0, [x9, x8, lsl #3]
				; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, #1, mul vl]
				; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9, #2, mul vl]
				; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl]
				; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
				; CHECK-NEXT: addvl sp, sp, #4
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 8 x i64> %a, i64 %elt, i64 %idx
				ret <vscale x 8 x i64> %ins
				}

				; INSERT VECTOR ELT, CONSTANT IDX

				define <vscale x 4 x i16> @promote_insert_4i16(<vscale x 4 x i16> %a, i16 %elt) {
				; CHECK-LABEL: promote_insert_4i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #5
				; CHECK-NEXT: index z1.s, #0, #1
				; CHECK-NEXT: mov z2.s, w8
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
				; CHECK-NEXT: mov z0.s, p0/m, w0
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 4 x i16> %a, i16 %elt, i64 5
				ret <vscale x 4 x i16> %ins
				}

				; In this test, the index is small enough that we know it will be in the
				; low half of the vector and there is no need to go through the stack as
				; done in the remaining tests
				define <vscale x 32 x i8> @split_insert_32i8(<vscale x 32 x i8> %a, i8 %elt) {
				; CHECK-LABEL: split_insert_32i8:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: index z2.b, #0, #1
				; CHECK-NEXT: mov z3.b, w8
				; CHECK-NEXT: ptrue p0.b
				; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
				; CHECK-NEXT: mov z0.b, p0/m, w0
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 32 x i8> %a, i8 %elt, i64 3
				ret <vscale x 32 x i8> %ins
				}

				define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt) {
				; CHECK-LABEL: split_insert_32i16:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-4
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: rdvl x10, #2
				; CHECK-NEXT: sub x10, x10, #1 // =1
				; CHECK-NEXT: mov w9, #128
				; CHECK-NEXT: cmp x10, #128 // =128
				; CHECK-NEXT: ptrue p0.h
				; CHECK-NEXT: mov x8, sp
				; CHECK-NEXT: csel x9, x10, x9, lo
				; CHECK-NEXT: st1h { z3.h }, p0, [x8, #3, mul vl]
				; CHECK-NEXT: st1h { z2.h }, p0, [x8, #2, mul vl]
				; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
				; CHECK-NEXT: st1h { z0.h }, p0, [sp]
				; CHECK-NEXT: strh w0, [x8, x9, lsl #1]
				; CHECK-NEXT: ld1h { z1.h }, p0/z, [x8, #1, mul vl]
				; CHECK-NEXT: ld1h { z2.h }, p0/z, [x8, #2, mul vl]
				; CHECK-NEXT: ld1h { z3.h }, p0/z, [x8, #3, mul vl]
				; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
				; CHECK-NEXT: addvl sp, sp, #4
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 32 x i16> %a, i16 %elt, i64 128
				ret <vscale x 32 x i16> %ins
				}

				define <vscale x 8 x i32> @split_insert_8i32(<vscale x 8 x i32> %a, i32 %elt) {
				; CHECK-LABEL: split_insert_8i32:
				; CHECK: // %bb.0:
				; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
				; CHECK-NEXT: addvl sp, sp, #-2
				; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
				; CHECK-NEXT: .cfi_offset w29, -16
				; CHECK-NEXT: mov w9, #16960
				; CHECK-NEXT: cnth x10
				; CHECK-NEXT: movk w9, #15, lsl #16
				; CHECK-NEXT: sub x10, x10, #1 // =1
				; CHECK-NEXT: cmp x10, x9
				; CHECK-NEXT: ptrue p0.s
				; CHECK-NEXT: mov x8, sp
				; CHECK-NEXT: csel x9, x10, x9, lo
				; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
				; CHECK-NEXT: st1w { z0.s }, p0, [sp]
				; CHECK-NEXT: str w0, [x8, x9, lsl #2]
				; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
				; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
				; CHECK-NEXT: addvl sp, sp, #2
				; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
				; CHECK-NEXT: ret
				%ins = insertelement <vscale x 8 x i32> %a, i32 %elt, i64 1000000
				ret <vscale x 8 x i32> %ins
				}

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Legalisation of INSERT_VECTOR_ELT for scalable vectors
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 284665

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SVE][CodeGen] Legalisation of INSERT_VECTOR_ELT for scalable vectorsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 284665

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll

[SVE][CodeGen] Legalisation of INSERT_VECTOR_ELT for scalable vectors
ClosedPublic